Figure 2: A covariate balance plot shows the standardized mean difference between treatment and control groups for each measure used in our matching procedure before (unadjusted) and after (adjusted) matching. Our procedure effectively found a group of matched controls similar to the treated accounts along these measures.
Figure 3: Box and whisker plots visualize the distributions of our dependent variables within the blocked and blocking groups of user accounts and their matched controls before and after defederation. The lines correspond to the median, the boxes to the inter-quartile range (IQR), the whiskers to the range of the data within 1.5 * IQR, and the dots to data points outside the range of the whiskers.
Figure 5: A marginal effects plot visualizes results from our difference-in-differences analysis of account activity by week. We observe a discontinuous activity decrease among accounts on the blocked server that exceeds any decrease in the matched controls, but no corresponding change among accounts on the blocking server. The bands are 95% credible intervals that account for uncertainty in parameter estimates.
Figure 7: Median toxicity among accounts which posted each week for blocked and blocking user accounts. The median toxicity remained flat for all groups.
Beta regression coefficients drawn from the posterior of the parametric toxicity DiD model for user accounts whose server was defederated (blocked group) or whose server defederated another (blocking group). For all groups, the 95% credible intervals for a change in toxicity levels after treatment (\(\beta_1\), \(\beta_5\)) contain 0.
Colglazier, Carl, Nathan TeBlunthuis, and Aaron Shaw. 2024. “The Effects of Group Sanctions on Participation and Toxicity: Quasi-experimental Evidence from the Fediverse.”Proceedings of the International AAAI Conference on Web and Social Media 18 (May): 315–28. https://doi.org/10.1609/icwsm.v18i1.31316.
Source Code
---title: Defederation Notebook---This notebook takes the data from @colglazierEffectsGroupSanctions2024 and formats it for easy presentation across formats.```{r}#| label: setuplibrary(tidyverse)library(here)library(scales)source(here::here("code/common.R"))## plot stuffquantile_cl <-function(y, q=0.5, conf.level =0.95, na.rm=TRUE) { alpha <-1- conf.levelif (na.rm) y <- y[!is.na(y)] n <-length(y) l <-qbinom(alpha/2, size=n, prob = q) u <-1+ n - l ys <-sort.int(c(-Inf, y, Inf), partial =c(1+ l, 1+ u))data.frame(y =quantile(y, probs = q, na.rm=na.rm, type =8),ymin = ys[1+ l],ymax = ys[1+ u] )}median_cl <-function(y, conf.level=0.95, na.rm=TRUE) quantile_cl(y, q=0.5, conf.level=conf.level, na.rm=na.rm)``````{r}#| label: defed-datadata <-NULLplots <-NULLdata$metadata <-readRDS(here::here("data/defederation/metadata.rds"))data$model.alpha.data <-readRDS(here("data/defederation/model_activity_blocked_data.rds")) %>%mutate(blocking="Blocked")data$model.beta.data <-readRDS(here("data/defederation/model_activity_blocking_data.rds")) %>%mutate(blocking="Blocking")data$timeseries.byweek <-readRDS(here("data/defederation/timeseries_byweek.rds"))data$timeseries.byposttreatment <-readRDS(here("data/defederation/timeseries_byposttreatment.rds"))data$plotdf <-rbind( (data$model.alpha.data %>%select(id, selected, post_treatment, time, blocking, count)), (data$model.beta.data %>%select(id, selected, post_treatment, time, blocking, count))) %>%mutate(tgroup=dplyr::case_match(selected, 0~"Matched",1~"Treatment")) %>%mutate(treatment=post_treatment)data$balance.blocked <-readRDS(here("data/defederation/matches_blocked_bal_tab.rds"))data$balance.blocking <-readRDS(here("data/defederation/matches_blocking_bal_tab.rds"))as_tibble(data$plotdf)``````{r}#| label: fig-defed-timeline#| fig-height: 1.85#| fig-width: 3.5#| fig-cap: "**The y-axis shows the cumulative number of blocked and blocking accounts included in our analysis over our study period.**"plots$defed.timeline <- data$timeseries.byposttreatment %>%filter(selected) %>%filter(post_treatment==FALSE) %>%group_by(id) %>%slice(1) %>%group_by(block_type, treatment_date) %>%summarise(n =n()) %>%ungroup() %>%group_by(block_type) %>%mutate(cs=cumsum(n)) %>%ggplot(aes(y=cs, x=treatment_date, fill=block_type)) +geom_area(position ="identity", colour ="black", linewidth = .2, alpha=0.5) +#scale_fill_manual(values=c("grey80", "grey30")) + viridis::scale_fill_viridis(discrete=TRUE, option="mako") +theme(legend.title=element_blank()) +guides(linetype=guide_legend()) +scale_x_date(seq(as.Date("2021-04-01"), as.Date("2022-07-01"), by="6 months"), date_labels ="%b %Y") +theme(axis.text.x=element_text(angle=30, hjust=1), axis.title.x=element_blank()) +labs(y="Num. of accounts") +theme(legend.position="top")plots$defed.timeline ``````{r}#| label: fig-matching-love#| results: asis#| fig-height: 2.25#| fig-width: 3.5#| fig-cap: "A covariate balance plot shows the standardized mean difference between treatment and control groups for each measure used in our matching procedure before (unadjusted) and after (adjusted) matching. Our procedure effectively found a group of matched controls similar to the treated accounts along these measures."balance.blocked <-readRDS(here::here("data/defederation/matches_blocked_bal_tab.rds"))balance.blocking <-readRDS(here::here("data/defederation/matches_blocking_bal_tab.rds"))balances <-rbind( balance.blocked$Balance %>%mutate(group_type="Blocked") %>%rownames_to_column("Covariates"), balance.blocking$Balance %>%mutate(group_type="Blocking") %>%rownames_to_column("Covariates"))plots$love <- balances %>%select(Covariates, group_type, Diff.Un, Diff.Adj) %>%gather(key="Adjusted", value="value", Diff.Un, Diff.Adj) %>%mutate(Adjusted=case_when(Adjusted =="Diff.Un"~"Unadjusted", Adjusted =="Diff.Adj"~"Adjusted"),Covariates=case_when( Covariates =="log_count_sum"~"Post count", Covariates =="l_count_late"~"Post count (45)", Covariates =="log_replies"~"Replies", Covariates =="log_out_domains"~"Domains", Covariates =="log_user_count"~"Server accts.", Covariates =="treatment_date"~"Date", ) ) %>%ggplot(aes(x=Covariates, y=value, shape=Adjusted)) +geom_hline(yintercept =0, linetype ="dashed", color ="gray") +geom_point() +coord_flip() +facet_wrap(. ~ group_type, ncol=2) +scale_shape_manual(values=c(16, 2)) +theme( legend.title=element_blank(), legend.position="top", axis.title.y=element_blank(),axis.text.y =element_text(hjust=0),panel.spacing =unit(2, "lines"),legend.margin=margin(c(1,1,1,1)) ) +ylab("\n Standardized Mean Differences")plots$love``````{r}#| label: fig-ivs#| fig-height: 2#| fig-width: 3.5#| fig-cap: "Box and whisker plots visualize the distributions of our dependent variables within the blocked and blocking groups of user accounts and their matched controls before and after defederation. The lines correspond to the median, the boxes to the inter-quartile range (IQR), the whiskers to the range of the data within 1.5 * IQR, and the dots to data points outside the range of the whiskers."plots$ivs <- data$timeseries.byposttreatment %>%select(id, selected, post_treatment, toxicity, count) %>%mutate("Post count (log)"=log(count), "Toxicity"= toxicity) %>%select(id, selected, post_treatment, "Post count (log)", "Toxicity") %>%pivot_longer(cols =c("Post count (log)", "Toxicity"),names_to ="variable",values_to ="value" ) %>%mutate(group =paste(selected, post_treatment)) %>%mutate(g =factor(group)) %>%ggplot() +geom_boxplot(aes(y = value, group = g, fill = g)) +scale_fill_manual(values =viridis_pal(alpha =0.5, option ="viridis")(4),labels =c("Control (pre)", "Control (post)", "Treatment (pre)", "Treatment (post)") ) +facet_wrap(~ variable, scales ="free", ncol =1) +coord_flip() +theme(legend.title =element_blank(), axis.text.y =element_blank(), axis.ticks.y =element_blank(), axis.title.x =element_blank() )plots$ivs``````{r}#| label: fig-median-counts#| fig-width: 3.5#| fig-asp: 0.8active_byweek <- data$timeseries.byweek %>%mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%group_by(block_type, selected, time) %>%summarize(active=sum(count >0)/n(), .groups="drop") data$pdf <- data$plotdf %>%group_by(blocking, selected, time) %>%summarize(median=median(count), mean=mean(count), median_cl(count)) plots$median.counts <- data$pdf %>%mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, blocking))) +facet_wrap(. ~ blocking, ncol=1) +geom_line(aes(linetype=selected)) +geom_ribbon(aes(fill=selected), alpha=0.25) +scale_linetype_manual(values =c("Control"="dotted", "Treatment"="solid")) +geom_vline(xintercept =0, linetype="dashed", color="black") +expand_limits(y =0) +#scale_fill_manual(values=c("grey80", "grey30")) + viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +theme(legend.title=element_blank(),legend.position="bottom") +labs(x="Weeks since treatment", y="Median post count")plots$median.counts``````{r}#| label: wilcox-activity#| results: asisdf_bna <- data$timeseries.byposttreatmentrun_wilcox <-function(df, keys) { med <- df %>%group_by(post_treatment) %>%summarize(m=median(count)) %>%pull(m) %>% diff t <-wilcox.test(count ~ post_treatment, alternative="two.sided", data=df)return(data.frame(block_type=keys$block_type, selected=keys$selected, median=med, W=t$statistic, p=t$p.value))}run_wilcox_comp <-function(df, keys) { med <- df %>%group_by(selected) %>%summarize(m=median(count)) %>%pull(m) %>% diff t <-wilcox.test(count ~ selected, alternative="two.sided", data=df)return(data.frame(block_type=keys$block_type, median=med, W=t$statistic, p=t$p.value))}table1 <- df_bna %>%arrange(block_type, desc(selected)) %>%group_by(block_type, selected) %>%group_map(~run_wilcox(.x, .y)) %>%bind_rows() %>%arrange(block_type, desc(selected)) %>%add_column(Group=c("$U_0$", "$C_0$", "$U_1$", "$C_1$")) %>%select(Group, median, W, p)join_pre_post <-function(df) {return(inner_join( (df %>%filter(post_treatment ==TRUE) %>%rename(post_count = count)), (df %>%filter(post_treatment ==FALSE) %>%rename(pre_count = count)),by=c("id", "block_type", "selected")))}pre_and_post_counts <- df_bna %>%select(id, block_type, selected, post_treatment, count) %>% as_tibble %>% join_pre_post %>%mutate(count = post_count - pre_count)median_change <- pre_and_post_counts %>%group_by(block_type, selected) %>%summarize(c=median(count), pre=median(pre_count)) %>%mutate(change=c/pre)table2 <- pre_and_post_counts %>%group_by(block_type) %>%group_map(~run_wilcox_comp(.x, .y)) %>%bind_rows() %>%add_column(Group=c("$\\Delta_0$", "$\\Delta_1$")) %>%select(Group, median, W, p)plots$tbl.wilcox.activity <-rbind(table1, table2) |> tinytable::tt() |>#tinytable::format_tt(j = 1, quarto = TRUE) |> tinytable::format_tt(j =4, digits =3, num_fmt ="decimal", num_zero = T, num_suffix = F) |> tinytable::format_tt(j =2:3, digits =1, num_fmt ="decimal", num_zero=T) |> tinytable::style_tt(i =5, line ="t", line_color ="black", line_width =0.1) |> tinytable::style_tt(j =2:4, align ="d")plots$tbl.wilcox.activity``````{r}#| label: tbl-did-activity-coefsdid_activity_df <-readRDS(here::here("data/defederation/table_activity.rds"))plots$tbl.activity.did <- did_activity_df %>%filter(block_type=="Blocked") %>%select(-block_type) %>%left_join( did_activity_df %>%filter(block_type=="Blocking") %>%select(-block_type),by =c("Term"="Term")) %>% tinytable::tt() %>%setNames(c("Term", "Estimate", "Std. err.", "Low", "High", "Estimate", "Std. err.", "Low", "High")) %>% tinytable::group_tt(j =list("Blocked"=2:5, "Blocking"=6:9)) %>% tinytable::format_tt(digits =3, num_fmt ="decimal", num_zero = T, num_suffix = F) |> tinytable::format_tt(j =1, quarto =TRUE) |> tinytable::style_tt(j =1, align ="l") |> tinytable::theme_tt("resize", width =1.0)plots$tbl.activity.did``````{r}#| label: fig-activity-did#| results: asis#| fig-width: 3.5#| fig-asp: 0.78#| fig-cap: "A marginal effects plot visualizes results from our difference-in-differences analysis of account activity by week. We observe a discontinuous activity decrease among accounts on the blocked server that exceeds any decrease in the matched controls, but no corresponding change among accounts on the blocking server. The bands are 95% credible intervals that account for uncertainty in parameter estimates."draws.blocked <-readRDS(here::here("data/defederation/draws_activity_blocked.rds")) %>%mutate(block_type="Blocked")draws.blocking <-readRDS(here::here("data/defederation/draws_activity_blocking.rds")) %>%mutate(block_type="Blocking")draws <-rbind(draws.blocked, draws.blocking)plots$fig.activity.did <- draws %>%group_by(block_type, selected, post_treatment, time) %>%summarise(median=median(.value), low=quantile(.value, 0.025), high=quantile(.value, 0.975)) %>%mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%ggplot(., aes(x=time, y=median, group=interaction(selected, post_treatment))) +geom_line(aes(linetype=selected)) +geom_ribbon(aes(ymin=low, ymax=high, x=time, fill=selected), alpha=0.25) +scale_linetype_manual(values =c("Control"="dotted", "Treatment"="solid")) +facet_wrap(. ~ block_type,ncol=1) +#scale_fill_manual(values=c("grey80", "grey30")) + viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +theme(legend.title=element_blank(),legend.position="bottom") +ylab("Post count (log)") +xlab("Weeks since treatment")plots$fig.activity.did``````{r}#| label: fig-median-toxicity#| fig-width: 3.5#| fig-asp: 0.75plots$median.toxicity <- data$timeseries.byweek %>% tidyr::drop_na(toxicity) %>%group_by(block_type, selected, time) %>%summarize(median=median(toxicity), mean=mean(toxicity), median_cl(toxicity)) %>%mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%mutate(selected=factor(selected)) %>%ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, block_type))) +facet_wrap(. ~ block_type, ncol=1) +geom_line(aes(linetype=selected)) +geom_ribbon(aes(fill=selected), alpha=0.25) +scale_linetype_manual(values =c("Control"="dotted", "Treatment"="solid")) +geom_vline(xintercept =0, linetype="dashed", color="black") +expand_limits(y =0) +#scale_fill_manual(values=c("grey80", "grey30")) + viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +theme(legend.title=element_blank(), legend.position='bottom') +labs(x="Weeks since treatment", y="Median toxicity") +scale_y_continuous(breaks=breaks_pretty(n=3))plots$median.toxicity``````{r}#| label: fig-median-toxicity-week#| fig-width: 3.5#| fig-asp: 0.75#| echo: false#| fig-cap: "Median toxicity among accounts which posted each week for blocked and blocking user accounts. The median toxicity remained flat for all groups."plots$fig.median.toxicity.week <- data$timeseries.byweek %>%drop_na(toxicity) %>%group_by(block_type, selected, time) %>%summarize(median=median(toxicity), mean=mean(toxicity), median_cl(toxicity)) %>%mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%mutate(selected=factor(selected)) %>%ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, block_type))) +facet_wrap(. ~ block_type, ncol=1) +geom_line(aes(linetype=selected)) +geom_ribbon(aes(fill=selected), alpha=0.25) +scale_linetype_manual(values =c("Control"="dotted", "Treatment"="solid")) +geom_vline(xintercept =0, linetype="dashed", color="black") +expand_limits(y =0) +#scale_fill_manual(values=c("grey80", "grey30")) + viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +theme(legend.title=element_blank(), legend.position='bottom') +labs(x="Weeks since treatment", y="Median toxicity") +scale_y_continuous(breaks=breaks_pretty(n=3))plots$fig.median.toxicity.week``````{r}#| label: tbl-toxicity-wilcoxondf_bna <-semi_join( (data$timeseries.byposttreatment %>%drop_na(toxicity)), (data$timeseries.byposttreatment %>%drop_na(toxicity) %>%group_by(id) %>% count %>%filter(n==2)),by="id")run_wilcox <-function(df, keys) { med <- df %>%group_by(post_treatment) %>%summarize(m=median(toxicity)) %>%pull(m) %>% diff t <-wilcox.test(toxicity ~ post_treatment, alternative="two.sided", data=df)return(data.frame(block_type=keys$block_type, selected=keys$selected, median=med, W=t$statistic, p=t$p.value))}run_wilcox_comp <-function(df, keys) { med <- df %>%group_by(selected) %>%summarize(m=median(toxicity)) %>%pull(m) %>% diff t <-wilcox.test(toxicity ~ selected, alternative="two.sided", data=df)return(data.frame(block_type=keys$block_type, median=med, W=t$statistic, p=t$p.value))}data$table1 <- df_bna %>%arrange(block_type, desc(selected)) %>%group_by(block_type, selected) %>%group_map(~run_wilcox(.x, .y)) %>%bind_rows() %>%arrange(block_type, desc(selected)) %>%add_column(Group=c("$U_0$", "$C_0$", "$U_1$", "$C_1$")) %>%select(Group, median, W, p)table2 <- df_bna %>%arrange(id) %>%group_by(block_type, selected) %>%reframe(toxicity = toxicity[post_treatment ==TRUE] - toxicity[post_treatment ==FALSE]) %>%group_by(block_type) %>%group_map(~run_wilcox_comp(.x, .y)) %>%bind_rows() %>%add_column(Group=c("$\\Delta_0$", "$\\Delta_1$")) %>%select(Group, median, W, p)plots$tbl.toxicity.wilcoxon <-rbind(data$table1, table2) |> tinytable::tt() |> tinytable::format_tt(j =1, quarto =TRUE) |> tinytable::format_tt(j =c(2, 4), digits =3, num_fmt ="decimal", num_zero = T, num_suffix = F) |> tinytable::format_tt(j =3, digits =1, num_fmt ="decimal", num_zero=T) |> tinytable::style_tt(i =5, line ="t", line_color ="black", line_width =0.1) |> tinytable::style_tt(j =2:4, align ="d")plots$tbl.toxicity.wilcoxon``````{r}#| label: tbl-did-toxicity#| fig-cap: Beta regression coefficients drawn from the posterior of the parametric toxicity DiD model for user accounts whose server was defederated (blocked group) or whose server defederated another (blocking group). For all groups, the 95% credible intervals for a change in toxicity levels after treatment ($\beta_1$, $\beta_5$) contain 0.#| fig-scap: "DiD toxicity coefficients"did_toxicity_df <-readRDS(here::here("data/defederation/table_toxicity.rds"))toxicity_df <- did_toxicity_df %>%select(selected, term, value, sd, .lower, .upper, block_type)plots$tbl.toxicity.did <- toxicity_df %>%filter(block_type=="Blocked") %>%select(-block_type) %>%left_join( toxicity_df %>%filter(block_type=="Blocking") %>%select(-block_type),by =c("term", "selected") ) %>%mutate(selected =ifelse(selected, "Control", "Treatment"))plots$tbl.toxicity.did |> tinytable::tt() %>%setNames(c("Group", "Term", "Estimate", "Std. err.", "Low", "High", "Estimate", "Std. err.", "Low", "High")) %>% tinytable::group_tt(j =list("Blocked"=2:6, "Blocking"=6:9)) |> tinytable::format_tt(digits =3, num_fmt ="decimal", num_zero = T, num_suffix = F) |> tinytable::format_tt(j =2, quarto =TRUE) |> tinytable::style_tt(j =3:10, align ="d")``````{r}#| label: save-datasaveRDS(data, here::here("data/processed/defederation_data.rds"))saveRDS(plots, here::here("data/processed/defederation_plots.rds"))```