Federating the Social Web
  • Dissertation
  • Prospectus
  • Notebooks
    • Defederation
    • Newcomers
    • Rules
    • Other
  1. Defederation Notebook
  • Defederation Notebook
  • Newcomers Plots
  • Catch-all Notebook
  • Rules Study
    • Rules Study Notebook
    • Rules Memos

Defederation Notebook

  • Show All Code
  • Hide All Code

  • View Source

This notebook takes the data from Colglazier, TeBlunthuis, and Shaw (2024) and formats it for easy presentation across formats.

Code
library(tidyverse)
library(here)
library(scales)

source(here::here("code/common.R"))

## plot stuff
quantile_cl <- function(y, q=0.5, conf.level = 0.95, na.rm=TRUE) {
  alpha <- 1 - conf.level
  if (na.rm) y <- y[!is.na(y)]
  n <- length(y)
  l <- qbinom(alpha/2, size=n, prob = q)
  u <- 1 + n - l
  ys <- sort.int(c(-Inf, y, Inf), partial = c(1 + l, 1 + u))
  data.frame(
    y = quantile(y, probs = q, na.rm=na.rm, type = 8),
    ymin = ys[1 + l],
    ymax = ys[1 + u]
  )
}

median_cl <- function(y, conf.level=0.95, na.rm=TRUE) quantile_cl(y, q=0.5, conf.level=conf.level, na.rm=na.rm)
Code
data <- NULL
plots <- NULL

data$metadata <- readRDS(here::here("data/defederation/metadata.rds"))
data$model.alpha.data <- readRDS(here("data/defederation/model_activity_blocked_data.rds")) %>%
  mutate(blocking="Blocked")
data$model.beta.data <- readRDS(here("data/defederation/model_activity_blocking_data.rds")) %>%
  mutate(blocking="Blocking")
data$timeseries.byweek <- readRDS(here("data/defederation/timeseries_byweek.rds"))
data$timeseries.byposttreatment <- readRDS(here("data/defederation/timeseries_byposttreatment.rds"))

data$plotdf <- rbind(
  (data$model.alpha.data %>% select(id, selected, post_treatment, time, blocking, count)),
  (data$model.beta.data %>% select(id, selected, post_treatment, time, blocking, count))
) %>%
  mutate(tgroup=dplyr::case_match(selected, 0 ~ "Matched",1 ~ "Treatment")) %>%
  mutate(treatment=post_treatment)

data$balance.blocked <- readRDS(here("data/defederation/matches_blocked_bal_tab.rds"))
data$balance.blocking <- readRDS(here("data/defederation/matches_blocking_bal_tab.rds"))


as_tibble(data$plotdf)
# A tibble: 21,216 × 8
   id              selected post_treatment  time blocking count tgroup treatment
   <chr>           <lgl>    <lgl>          <int> <chr>    <int> <chr>  <lgl>    
 1 51337076349112… FALSE    FALSE            -12 Blocked     57 Match… FALSE    
 2 51337076349112… FALSE    FALSE            -11 Blocked     32 Match… FALSE    
 3 51337076349112… FALSE    FALSE            -10 Blocked     61 Match… FALSE    
 4 51337076349112… FALSE    FALSE             -9 Blocked     44 Match… FALSE    
 5 51337076349112… FALSE    FALSE             -8 Blocked     20 Match… FALSE    
 6 51337076349112… FALSE    FALSE             -7 Blocked     58 Match… FALSE    
 7 51337076349112… FALSE    FALSE             -6 Blocked     12 Match… FALSE    
 8 51337076349112… FALSE    FALSE             -5 Blocked     29 Match… FALSE    
 9 51337076349112… FALSE    FALSE             -4 Blocked     26 Match… FALSE    
10 51337076349112… FALSE    FALSE             -3 Blocked     48 Match… FALSE    
# ℹ 21,206 more rows
Code
plots$defed.timeline <- data$timeseries.byposttreatment %>%
    filter(selected) %>% filter(post_treatment==FALSE) %>%
    group_by(id) %>% slice(1) %>%
    group_by(block_type, treatment_date) %>% summarise(n = n()) %>% ungroup() %>%
    group_by(block_type) %>% mutate(cs=cumsum(n)) %>%
    ggplot(aes(y=cs, x=treatment_date, fill=block_type)) +
    geom_area(position = "identity", colour = "black", linewidth = .2, alpha=0.5) +
    #scale_fill_manual(values=c("grey80", "grey30")) +
    viridis::scale_fill_viridis(discrete=TRUE, option="mako") +
    theme(legend.title=element_blank()) +
    guides(linetype=guide_legend()) +
    scale_x_date(seq(as.Date("2021-04-01"), as.Date("2022-07-01"), by="6 months"), date_labels =  "%b %Y") +
    theme(axis.text.x=element_text(angle=30, hjust=1), axis.title.x=element_blank()) +
    labs(y="Num. of accounts") +
    theme(legend.position="top")

plots$defed.timeline 
Figure 1: The y-axis shows the cumulative number of blocked and blocking accounts included in our analysis over our study period.
Code
balance.blocked <- readRDS(here::here("data/defederation/matches_blocked_bal_tab.rds"))
balance.blocking <- readRDS(here::here("data/defederation/matches_blocking_bal_tab.rds"))

balances <- rbind(
  balance.blocked$Balance %>% mutate(group_type="Blocked") %>% rownames_to_column("Covariates"),
  balance.blocking$Balance %>% mutate(group_type="Blocking") %>% rownames_to_column("Covariates")
)
  
plots$love <- balances %>% select(Covariates, group_type, Diff.Un, Diff.Adj) %>%
  gather(key="Adjusted", value="value", Diff.Un, Diff.Adj) %>%
  mutate(
    Adjusted=case_when(Adjusted == "Diff.Un" ~ "Unadjusted", Adjusted == "Diff.Adj" ~ "Adjusted"),
    Covariates=case_when(
      Covariates == "log_count_sum" ~ "Post count",
      Covariates == "l_count_late" ~ "Post count (45)",
      Covariates == "log_replies" ~ "Replies",
      Covariates == "log_out_domains" ~ "Domains",
      Covariates == "log_user_count" ~ "Server accts.",
      Covariates == "treatment_date" ~ "Date",
    )
  ) %>%
  ggplot(aes(x=Covariates, y=value, shape=Adjusted)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
    geom_point() +
    coord_flip() +
    facet_wrap(. ~ group_type, ncol=2) +
    scale_shape_manual(values=c(16, 2)) +
    theme( legend.title=element_blank(), legend.position="top", axis.title.y=element_blank(),
    axis.text.y = element_text(hjust=0),
    panel.spacing = unit(2, "lines"),
    legend.margin=margin(c(1,1,1,1))
    ) +
    ylab("\n Standardized Mean Differences")

plots$love
Figure 2: A covariate balance plot shows the standardized mean difference between treatment and control groups for each measure used in our matching procedure before (unadjusted) and after (adjusted) matching. Our procedure effectively found a group of matched controls similar to the treated accounts along these measures.
Code
plots$ivs <- data$timeseries.byposttreatment %>%
  select(id, selected, post_treatment, toxicity, count) %>%
  mutate("Post count (log)" = log(count), "Toxicity" = toxicity) %>%
  select(id, selected, post_treatment, "Post count (log)", "Toxicity") %>%
  pivot_longer(
    cols = c("Post count (log)", "Toxicity"),
    names_to = "variable",
    values_to = "value"
  ) %>%
  mutate(group = paste(selected, post_treatment)) %>%
  mutate(g = factor(group)) %>%
  ggplot() +
  geom_boxplot(aes(y = value, group = g, fill = g)) + 
  scale_fill_manual(
    values = viridis_pal(alpha = 0.5, option = "viridis")(4),
    labels = c("Control (pre)", "Control (post)", 
               "Treatment (pre)", "Treatment (post)")
  ) +
  facet_wrap(~ variable, scales = "free", ncol = 1) +
  coord_flip() +
  theme(
    legend.title = element_blank(), 
    axis.text.y = element_blank(), 
    axis.ticks.y = element_blank(), 
    axis.title.x = element_blank()
  )
plots$ivs
Figure 3: Box and whisker plots visualize the distributions of our dependent variables within the blocked and blocking groups of user accounts and their matched controls before and after defederation. The lines correspond to the median, the boxes to the inter-quartile range (IQR), the whiskers to the range of the data within 1.5 * IQR, and the dots to data points outside the range of the whiskers.
Code
active_byweek <- data$timeseries.byweek %>%
    mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
    group_by(block_type, selected, time) %>%
    summarize(active=sum(count > 0)/n(), .groups="drop") 

data$pdf <- data$plotdf %>%
  group_by(blocking, selected, time) %>% summarize(median=median(count), mean=mean(count), median_cl(count)) 

plots$median.counts <- data$pdf %>%
    mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
    ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, blocking))) +
    facet_wrap(. ~ blocking, ncol=1) +
    geom_line(aes(linetype=selected)) +
    geom_ribbon(aes(fill=selected), alpha=0.25) +
    scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid")) +
    geom_vline(xintercept = 0, linetype="dashed", color="black") +
    expand_limits(y = 0) +
    #scale_fill_manual(values=c("grey80", "grey30")) +
    viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
    theme(legend.title= element_blank(),legend.position="bottom") +
    labs(x="Weeks since treatment", y="Median post count")

plots$median.counts
Figure 4
Code
df_bna <- data$timeseries.byposttreatment

run_wilcox <- function(df, keys) {
    med <- df %>% group_by(post_treatment) %>% summarize(m=median(count)) %>% pull(m) %>% diff
    t <- wilcox.test(count ~ post_treatment, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, selected=keys$selected, median=med, W=t$statistic, p=t$p.value))
}

run_wilcox_comp <- function(df, keys) {
    med <- df %>% group_by(selected) %>% summarize(m=median(count)) %>% pull(m) %>% diff
    t <- wilcox.test(count ~ selected, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, median=med, W=t$statistic, p=t$p.value))
}

table1 <- df_bna %>% arrange(block_type, desc(selected)) %>%
    group_by(block_type, selected) %>% group_map(~ run_wilcox(.x, .y)) %>% bind_rows() %>%
    arrange(block_type, desc(selected)) %>% add_column(Group=c("$U_0$", "$C_0$", "$U_1$", "$C_1$")) %>% select(Group, median, W, p)

join_pre_post <- function(df) {
  return(
    inner_join(
      (df %>% filter(post_treatment == TRUE) %>% rename(post_count = count)),
      (df %>% filter(post_treatment == FALSE) %>% rename(pre_count = count)),
      by=c("id", "block_type", "selected")))
}

pre_and_post_counts <- df_bna %>% select(id, block_type, selected, post_treatment, count) %>% as_tibble %>% join_pre_post %>%
  mutate(count = post_count - pre_count)

median_change <- pre_and_post_counts %>% group_by(block_type, selected) %>% summarize(c=median(count), pre=median(pre_count)) %>% mutate(change=c/pre)

table2 <- pre_and_post_counts %>%
  group_by(block_type) %>% group_map(~ run_wilcox_comp(.x, .y)) %>% bind_rows() %>%
  add_column(Group=c("$\\Delta_0$", "$\\Delta_1$")) %>% select(Group, median, W, p)

plots$tbl.wilcox.activity <- rbind(table1, table2) |> 
  tinytable::tt() |>
  #tinytable::format_tt(j = 1, quarto = TRUE) |>
  tinytable::format_tt(j = 4, digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 2:3, digits = 1, num_fmt = "decimal", num_zero=T) |>
  tinytable::style_tt(i = 5, line = "t", line_color = "black", line_width = 0.1) |>
  tinytable::style_tt(j = 2:4, align = "d")

plots$tbl.wilcox.activity
Group median W p
$U_0$ -135.5 41197.5 0.000
$C_0$ -18.0 35762.0 0.143
$U_1$ -54.5 12413.0 0.122
$C_1$ -53.5 12520.0 0.091
$\Delta_0$ -39.0 39927.0 0.000
$\Delta_1$ 3.0 10645.5 0.421
Code
did_activity_df <- readRDS(here::here("data/defederation/table_activity.rds"))
plots$tbl.activity.did <- did_activity_df %>%
  filter(block_type=="Blocked") %>%
  select(-block_type) %>%
  left_join(
    did_activity_df %>% filter(block_type=="Blocking") %>% select(-block_type),
    by = c("Term"="Term")
) %>%
  tinytable::tt() %>%
  setNames(c("Term", "Estimate", "Std. err.", "Low", "High", "Estimate", "Std. err.", "Low", "High")) %>%
  tinytable::group_tt(j = list("Blocked" = 2:5, "Blocking" = 6:9)) %>%
  tinytable::format_tt(digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 1, quarto = TRUE) |>
  tinytable::style_tt(j = 1, align = "l") |>
  tinytable::theme_tt("resize", width = 1.0)

plots$tbl.activity.did
Table 1
Blocked Blocking
Term Estimate Std. err. Low High Estimate Std. err. Low High
\(\beta_0\) (Intercept) 3.216 0.086 3.049 3.380 2.901 0.118 2.684 3.121
\(\beta_1\) Group -0.024 0.116 -0.255 0.198 0.023 0.177 -0.290 0.367
\(\beta_2\) Treatment -0.089 0.045 -0.178 0.002 -0.080 0.060 -0.201 0.034
\(\beta_3\) Time 0.006 0.004 -0.002 0.015 -0.002 0.006 -0.014 0.009
\(\beta_4\) Treatment : Time -0.026 0.006 -0.038 -0.014 -0.027 0.008 -0.042 -0.010
\(\beta_5\) Group : Treatment -0.241 0.065 -0.367 -0.116 -0.084 0.082 -0.247 0.072
\(\beta_6\) Group : Time -0.007 0.006 -0.020 0.004 0.006 0.008 -0.010 0.021
\(\beta_7\) Group : Treatment : Time -0.015 0.009 -0.031 0.002 0.009 0.011 -0.013 0.032
Code
draws.blocked <- readRDS(here::here("data/defederation/draws_activity_blocked.rds")) %>% mutate(block_type="Blocked")
draws.blocking <- readRDS(here::here("data/defederation/draws_activity_blocking.rds")) %>% mutate(block_type="Blocking")
draws <- rbind(draws.blocked, draws.blocking)
plots$fig.activity.did <- draws %>% group_by(block_type, selected, post_treatment, time) %>%
  summarise(median=median(.value), low=quantile(.value, 0.025), high=quantile(.value, 0.975)) %>%
  mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
  ggplot(., aes(x=time, y=median, group=interaction(selected, post_treatment))) +
  geom_line(aes(linetype=selected)) +
  geom_ribbon(aes(ymin=low, ymax=high, x=time, fill=selected), alpha=0.25) +
  scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid")) +
  facet_wrap(. ~ block_type,ncol=1) +
  #scale_fill_manual(values=c("grey80", "grey30")) +
  viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
  theme(legend.title= element_blank(),legend.position="bottom") +
  ylab("Post count (log)") +
  xlab("Weeks since treatment")
plots$fig.activity.did
Figure 5: A marginal effects plot visualizes results from our difference-in-differences analysis of account activity by week. We observe a discontinuous activity decrease among accounts on the blocked server that exceeds any decrease in the matched controls, but no corresponding change among accounts on the blocking server. The bands are 95% credible intervals that account for uncertainty in parameter estimates.
Code
plots$median.toxicity <- data$timeseries.byweek %>% tidyr::drop_na(toxicity) %>%
  group_by(block_type, selected, time) %>%
  summarize(median=median(toxicity), mean=mean(toxicity), median_cl(toxicity)) %>%
    mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
    mutate(selected=factor(selected)) %>%
    ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, block_type))) +
    facet_wrap(. ~ block_type, ncol=1) +
    geom_line(aes(linetype=selected)) +
    geom_ribbon(aes(fill=selected), alpha=0.25) +
    scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid"))  +
    geom_vline(xintercept = 0, linetype="dashed", color="black") +
    expand_limits(y = 0) +
    #scale_fill_manual(values=c("grey80", "grey30")) +
    viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
    theme(legend.title= element_blank(), legend.position='bottom') +
    labs(x="Weeks since treatment", y="Median toxicity")  + scale_y_continuous(breaks=breaks_pretty(n=3))

plots$median.toxicity
Figure 6
Figure 7: Median toxicity among accounts which posted each week for blocked and blocking user accounts. The median toxicity remained flat for all groups.
Code
df_bna <- semi_join(
    (data$timeseries.byposttreatment %>% drop_na(toxicity)),
    (data$timeseries.byposttreatment %>% drop_na(toxicity) %>% group_by(id) %>% count %>% filter(n==2)),
    by="id"
)

run_wilcox <- function(df, keys) {
    med <- df %>% group_by(post_treatment) %>% summarize(m=median(toxicity)) %>% pull(m) %>% diff
    t <- wilcox.test(toxicity ~ post_treatment, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, selected=keys$selected, median=med, W=t$statistic, p=t$p.value))
}

run_wilcox_comp <- function(df, keys) {
    med <- df %>% group_by(selected) %>% summarize(m=median(toxicity)) %>% pull(m) %>% diff
    t <- wilcox.test(toxicity ~ selected, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, median=med, W=t$statistic, p=t$p.value))
}

data$table1 <- df_bna %>% arrange(block_type, desc(selected)) %>%
    group_by(block_type, selected) %>% group_map(~ run_wilcox(.x, .y)) %>% bind_rows() %>%
    arrange(block_type, desc(selected)) %>% add_column(Group=c("$U_0$", "$C_0$", "$U_1$", "$C_1$")) %>% select(Group, median, W, p)
table2 <- df_bna %>% arrange(id) %>% group_by(block_type, selected) %>% 
    reframe(toxicity = toxicity[post_treatment == TRUE] - toxicity[post_treatment == FALSE]) %>%
    group_by(block_type) %>% group_map(~ run_wilcox_comp(.x, .y)) %>% bind_rows() %>%
    add_column(Group=c("$\\Delta_0$", "$\\Delta_1$")) %>% select(Group, median, W, p)

plots$tbl.toxicity.wilcoxon <- rbind(data$table1, table2) |> 
  tinytable::tt() |>
  tinytable::format_tt(j = 1, quarto = TRUE) |>
  tinytable::format_tt(j = c(2, 4), digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 3, digits = 1, num_fmt = "decimal", num_zero=T) |>
  tinytable::style_tt(i = 5, line = "t", line_color = "black", line_width = 0.1) |>
  tinytable::style_tt(j = 2:4, align = "d")

plots$tbl.toxicity.wilcoxon
Table 2
Group median W p
\(U_0\) -0.006 17746 0.538
\(C_0\) 0.004 14000 0.950
\(U_1\) -0.008 6514 0.619
\(C_1\) 0.001 5546 0.873
\(\Delta_0\) -0.005 17161 0.072
\(\Delta_1\) 0.000 6414 0.305
Code
did_toxicity_df <- readRDS(here::here("data/defederation/table_toxicity.rds"))
toxicity_df <- did_toxicity_df %>% select(selected, term, value, sd, .lower, .upper, block_type)
plots$tbl.toxicity.did <- toxicity_df %>%
  filter(block_type=="Blocked") %>%
  select(-block_type) %>%
  left_join(
  toxicity_df %>% filter(block_type=="Blocking") %>% select(-block_type),
    by = c("term", "selected")
  ) %>%
  mutate(selected = ifelse(selected, "Control", "Treatment"))
  
plots$tbl.toxicity.did |>
  tinytable::tt() %>%
  setNames(c("Group", "Term", "Estimate", "Std. err.", "Low", "High", "Estimate", "Std. err.", "Low", "High")) %>%
  tinytable::group_tt(j = list("Blocked" = 2:6, "Blocking" = 6:9)) |>
  tinytable::format_tt(digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 2, quarto = TRUE) |>
  tinytable::style_tt(j = 3:10, align = "d")
Table 3
Blocked Blocking
Group Term Estimate Std. err. Low High Estimate Std. err. Low High
Treatment \(\beta_0\) (Intercept) -0.143 0.438 -1.197 0.763 -0.042 0.284 -0.754 0.518
Treatment \(\beta_1\) Treatment 0.002 0.006 -0.009 0.018 -0.001 0.010 -0.020 0.017
Treatment \(\beta_2\) Time 0.001 0.001 -0.000 0.002 0.001 0.001 -0.001 0.003
Treatment \(\beta_3\) Treatment : Time -0.002 0.001 -0.004 0.000 -0.001 0.002 -0.004 0.003
Control \(\beta_4\) (Intercept) 0.136 0.436 -0.907 1.044 0.048 0.283 -0.613 0.649
Control \(\beta_5\) Treatment -0.000 0.006 -0.016 0.011 0.004 0.009 -0.014 0.024
Control \(\beta_6\) Time 0.001 0.001 -0.000 0.002 0.003 0.001 0.001 0.005
Control \(\beta_7\) Treatment : Time -0.005 0.001 -0.007 -0.003 -0.006 0.002 -0.009 -0.002

Beta regression coefficients drawn from the posterior of the parametric toxicity DiD model for user accounts whose server was defederated (blocked group) or whose server defederated another (blocking group). For all groups, the 95% credible intervals for a change in toxicity levels after treatment (\(\beta_1\), \(\beta_5\)) contain 0.

Code
saveRDS(data, here::here("data/processed/defederation_data.rds"))
saveRDS(plots, here::here("data/processed/defederation_plots.rds"))

References

Colglazier, Carl, Nathan TeBlunthuis, and Aaron Shaw. 2024. “The Effects of Group Sanctions on Participation and Toxicity: Quasi-experimental Evidence from the Fediverse.” Proceedings of the International AAAI Conference on Web and Social Media 18 (May): 315–28. https://doi.org/10.1609/icwsm.v18i1.31316.
Source Code
---
title: Defederation Notebook
---

This notebook takes the data from @colglazierEffectsGroupSanctions2024 and formats it for easy presentation across formats.

```{r}
#| label: setup

library(tidyverse)
library(here)
library(scales)

source(here::here("code/common.R"))

## plot stuff
quantile_cl <- function(y, q=0.5, conf.level = 0.95, na.rm=TRUE) {
  alpha <- 1 - conf.level
  if (na.rm) y <- y[!is.na(y)]
  n <- length(y)
  l <- qbinom(alpha/2, size=n, prob = q)
  u <- 1 + n - l
  ys <- sort.int(c(-Inf, y, Inf), partial = c(1 + l, 1 + u))
  data.frame(
    y = quantile(y, probs = q, na.rm=na.rm, type = 8),
    ymin = ys[1 + l],
    ymax = ys[1 + u]
  )
}

median_cl <- function(y, conf.level=0.95, na.rm=TRUE) quantile_cl(y, q=0.5, conf.level=conf.level, na.rm=na.rm)
```

```{r}
#| label: defed-data

data <- NULL
plots <- NULL

data$metadata <- readRDS(here::here("data/defederation/metadata.rds"))
data$model.alpha.data <- readRDS(here("data/defederation/model_activity_blocked_data.rds")) %>%
  mutate(blocking="Blocked")
data$model.beta.data <- readRDS(here("data/defederation/model_activity_blocking_data.rds")) %>%
  mutate(blocking="Blocking")
data$timeseries.byweek <- readRDS(here("data/defederation/timeseries_byweek.rds"))
data$timeseries.byposttreatment <- readRDS(here("data/defederation/timeseries_byposttreatment.rds"))

data$plotdf <- rbind(
  (data$model.alpha.data %>% select(id, selected, post_treatment, time, blocking, count)),
  (data$model.beta.data %>% select(id, selected, post_treatment, time, blocking, count))
) %>%
  mutate(tgroup=dplyr::case_match(selected, 0 ~ "Matched",1 ~ "Treatment")) %>%
  mutate(treatment=post_treatment)

data$balance.blocked <- readRDS(here("data/defederation/matches_blocked_bal_tab.rds"))
data$balance.blocking <- readRDS(here("data/defederation/matches_blocking_bal_tab.rds"))


as_tibble(data$plotdf)
```

```{r}
#| label: fig-defed-timeline
#| fig-height: 1.85
#| fig-width: 3.5
#| fig-cap: "**The y-axis shows the cumulative number of blocked and blocking accounts included in our analysis over our study period.**"

plots$defed.timeline <- data$timeseries.byposttreatment %>%
    filter(selected) %>% filter(post_treatment==FALSE) %>%
    group_by(id) %>% slice(1) %>%
    group_by(block_type, treatment_date) %>% summarise(n = n()) %>% ungroup() %>%
    group_by(block_type) %>% mutate(cs=cumsum(n)) %>%
    ggplot(aes(y=cs, x=treatment_date, fill=block_type)) +
    geom_area(position = "identity", colour = "black", linewidth = .2, alpha=0.5) +
    #scale_fill_manual(values=c("grey80", "grey30")) +
    viridis::scale_fill_viridis(discrete=TRUE, option="mako") +
    theme(legend.title=element_blank()) +
    guides(linetype=guide_legend()) +
    scale_x_date(seq(as.Date("2021-04-01"), as.Date("2022-07-01"), by="6 months"), date_labels =  "%b %Y") +
    theme(axis.text.x=element_text(angle=30, hjust=1), axis.title.x=element_blank()) +
    labs(y="Num. of accounts") +
    theme(legend.position="top")

plots$defed.timeline 
```

```{r}
#| label: fig-matching-love
#| results: asis
#| fig-height: 2.25
#| fig-width: 3.5
#| fig-cap: "A covariate balance plot shows the standardized mean difference between treatment and control groups for each measure used in our matching procedure before (unadjusted) and after (adjusted) matching. Our procedure effectively found a group of matched controls similar to the treated accounts along these measures."

balance.blocked <- readRDS(here::here("data/defederation/matches_blocked_bal_tab.rds"))
balance.blocking <- readRDS(here::here("data/defederation/matches_blocking_bal_tab.rds"))

balances <- rbind(
  balance.blocked$Balance %>% mutate(group_type="Blocked") %>% rownames_to_column("Covariates"),
  balance.blocking$Balance %>% mutate(group_type="Blocking") %>% rownames_to_column("Covariates")
)
  
plots$love <- balances %>% select(Covariates, group_type, Diff.Un, Diff.Adj) %>%
  gather(key="Adjusted", value="value", Diff.Un, Diff.Adj) %>%
  mutate(
    Adjusted=case_when(Adjusted == "Diff.Un" ~ "Unadjusted", Adjusted == "Diff.Adj" ~ "Adjusted"),
    Covariates=case_when(
      Covariates == "log_count_sum" ~ "Post count",
      Covariates == "l_count_late" ~ "Post count (45)",
      Covariates == "log_replies" ~ "Replies",
      Covariates == "log_out_domains" ~ "Domains",
      Covariates == "log_user_count" ~ "Server accts.",
      Covariates == "treatment_date" ~ "Date",
    )
  ) %>%
  ggplot(aes(x=Covariates, y=value, shape=Adjusted)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
    geom_point() +
    coord_flip() +
    facet_wrap(. ~ group_type, ncol=2) +
    scale_shape_manual(values=c(16, 2)) +
    theme( legend.title=element_blank(), legend.position="top", axis.title.y=element_blank(),
    axis.text.y = element_text(hjust=0),
    panel.spacing = unit(2, "lines"),
    legend.margin=margin(c(1,1,1,1))
    ) +
    ylab("\n Standardized Mean Differences")

plots$love
```

```{r}
#| label: fig-ivs
#| fig-height: 2
#| fig-width: 3.5
#| fig-cap: "Box and whisker plots visualize the distributions of our dependent variables within the blocked and blocking groups of user accounts and their matched controls before and after defederation. The lines correspond to the median, the boxes to the inter-quartile range (IQR), the whiskers to the range of the data within 1.5 * IQR, and the dots to data points outside the range of the whiskers."
plots$ivs <- data$timeseries.byposttreatment %>%
  select(id, selected, post_treatment, toxicity, count) %>%
  mutate("Post count (log)" = log(count), "Toxicity" = toxicity) %>%
  select(id, selected, post_treatment, "Post count (log)", "Toxicity") %>%
  pivot_longer(
    cols = c("Post count (log)", "Toxicity"),
    names_to = "variable",
    values_to = "value"
  ) %>%
  mutate(group = paste(selected, post_treatment)) %>%
  mutate(g = factor(group)) %>%
  ggplot() +
  geom_boxplot(aes(y = value, group = g, fill = g)) + 
  scale_fill_manual(
    values = viridis_pal(alpha = 0.5, option = "viridis")(4),
    labels = c("Control (pre)", "Control (post)", 
               "Treatment (pre)", "Treatment (post)")
  ) +
  facet_wrap(~ variable, scales = "free", ncol = 1) +
  coord_flip() +
  theme(
    legend.title = element_blank(), 
    axis.text.y = element_blank(), 
    axis.ticks.y = element_blank(), 
    axis.title.x = element_blank()
  )
plots$ivs
```



```{r}
#| label: fig-median-counts
#| fig-width: 3.5
#| fig-asp: 0.8


active_byweek <- data$timeseries.byweek %>%
    mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
    group_by(block_type, selected, time) %>%
    summarize(active=sum(count > 0)/n(), .groups="drop") 

data$pdf <- data$plotdf %>%
  group_by(blocking, selected, time) %>% summarize(median=median(count), mean=mean(count), median_cl(count)) 

plots$median.counts <- data$pdf %>%
    mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
    ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, blocking))) +
    facet_wrap(. ~ blocking, ncol=1) +
    geom_line(aes(linetype=selected)) +
    geom_ribbon(aes(fill=selected), alpha=0.25) +
    scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid")) +
    geom_vline(xintercept = 0, linetype="dashed", color="black") +
    expand_limits(y = 0) +
    #scale_fill_manual(values=c("grey80", "grey30")) +
    viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
    theme(legend.title= element_blank(),legend.position="bottom") +
    labs(x="Weeks since treatment", y="Median post count")

plots$median.counts
```

```{r}
#| label: wilcox-activity
#| results: asis
df_bna <- data$timeseries.byposttreatment

run_wilcox <- function(df, keys) {
    med <- df %>% group_by(post_treatment) %>% summarize(m=median(count)) %>% pull(m) %>% diff
    t <- wilcox.test(count ~ post_treatment, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, selected=keys$selected, median=med, W=t$statistic, p=t$p.value))
}

run_wilcox_comp <- function(df, keys) {
    med <- df %>% group_by(selected) %>% summarize(m=median(count)) %>% pull(m) %>% diff
    t <- wilcox.test(count ~ selected, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, median=med, W=t$statistic, p=t$p.value))
}

table1 <- df_bna %>% arrange(block_type, desc(selected)) %>%
    group_by(block_type, selected) %>% group_map(~ run_wilcox(.x, .y)) %>% bind_rows() %>%
    arrange(block_type, desc(selected)) %>% add_column(Group=c("$U_0$", "$C_0$", "$U_1$", "$C_1$")) %>% select(Group, median, W, p)

join_pre_post <- function(df) {
  return(
    inner_join(
      (df %>% filter(post_treatment == TRUE) %>% rename(post_count = count)),
      (df %>% filter(post_treatment == FALSE) %>% rename(pre_count = count)),
      by=c("id", "block_type", "selected")))
}

pre_and_post_counts <- df_bna %>% select(id, block_type, selected, post_treatment, count) %>% as_tibble %>% join_pre_post %>%
  mutate(count = post_count - pre_count)

median_change <- pre_and_post_counts %>% group_by(block_type, selected) %>% summarize(c=median(count), pre=median(pre_count)) %>% mutate(change=c/pre)

table2 <- pre_and_post_counts %>%
  group_by(block_type) %>% group_map(~ run_wilcox_comp(.x, .y)) %>% bind_rows() %>%
  add_column(Group=c("$\\Delta_0$", "$\\Delta_1$")) %>% select(Group, median, W, p)

plots$tbl.wilcox.activity <- rbind(table1, table2) |> 
  tinytable::tt() |>
  #tinytable::format_tt(j = 1, quarto = TRUE) |>
  tinytable::format_tt(j = 4, digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 2:3, digits = 1, num_fmt = "decimal", num_zero=T) |>
  tinytable::style_tt(i = 5, line = "t", line_color = "black", line_width = 0.1) |>
  tinytable::style_tt(j = 2:4, align = "d")

plots$tbl.wilcox.activity
```

```{r}
#| label: tbl-did-activity-coefs
did_activity_df <- readRDS(here::here("data/defederation/table_activity.rds"))
plots$tbl.activity.did <- did_activity_df %>%
  filter(block_type=="Blocked") %>%
  select(-block_type) %>%
  left_join(
    did_activity_df %>% filter(block_type=="Blocking") %>% select(-block_type),
    by = c("Term"="Term")
) %>%
  tinytable::tt() %>%
  setNames(c("Term", "Estimate", "Std. err.", "Low", "High", "Estimate", "Std. err.", "Low", "High")) %>%
  tinytable::group_tt(j = list("Blocked" = 2:5, "Blocking" = 6:9)) %>%
  tinytable::format_tt(digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 1, quarto = TRUE) |>
  tinytable::style_tt(j = 1, align = "l") |>
  tinytable::theme_tt("resize", width = 1.0)

plots$tbl.activity.did
```

```{r}
#| label: fig-activity-did
#| results: asis
#| fig-width: 3.5
#| fig-asp: 0.78
#| fig-cap: "A marginal effects plot visualizes results from our difference-in-differences analysis of account activity by week. We observe a discontinuous activity decrease among accounts on the blocked server that exceeds any decrease in the matched controls, but no corresponding change among accounts on the blocking server. The bands are 95% credible intervals that account for uncertainty in parameter estimates."
draws.blocked <- readRDS(here::here("data/defederation/draws_activity_blocked.rds")) %>% mutate(block_type="Blocked")
draws.blocking <- readRDS(here::here("data/defederation/draws_activity_blocking.rds")) %>% mutate(block_type="Blocking")
draws <- rbind(draws.blocked, draws.blocking)
plots$fig.activity.did <- draws %>% group_by(block_type, selected, post_treatment, time) %>%
  summarise(median=median(.value), low=quantile(.value, 0.025), high=quantile(.value, 0.975)) %>%
  mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
  ggplot(., aes(x=time, y=median, group=interaction(selected, post_treatment))) +
  geom_line(aes(linetype=selected)) +
  geom_ribbon(aes(ymin=low, ymax=high, x=time, fill=selected), alpha=0.25) +
  scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid")) +
  facet_wrap(. ~ block_type,ncol=1) +
  #scale_fill_manual(values=c("grey80", "grey30")) +
  viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
  theme(legend.title= element_blank(),legend.position="bottom") +
  ylab("Post count (log)") +
  xlab("Weeks since treatment")
plots$fig.activity.did
```

```{r}
#| label: fig-median-toxicity
#| fig-width: 3.5
#| fig-asp: 0.75

plots$median.toxicity <- data$timeseries.byweek %>% tidyr::drop_na(toxicity) %>%
  group_by(block_type, selected, time) %>%
  summarize(median=median(toxicity), mean=mean(toxicity), median_cl(toxicity)) %>%
    mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
    mutate(selected=factor(selected)) %>%
    ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, block_type))) +
    facet_wrap(. ~ block_type, ncol=1) +
    geom_line(aes(linetype=selected)) +
    geom_ribbon(aes(fill=selected), alpha=0.25) +
    scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid"))  +
    geom_vline(xintercept = 0, linetype="dashed", color="black") +
    expand_limits(y = 0) +
    #scale_fill_manual(values=c("grey80", "grey30")) +
    viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
    theme(legend.title= element_blank(), legend.position='bottom') +
    labs(x="Weeks since treatment", y="Median toxicity")  + scale_y_continuous(breaks=breaks_pretty(n=3))

plots$median.toxicity
```

```{r}
#| label: fig-median-toxicity-week
#| fig-width: 3.5
#| fig-asp: 0.75
#| echo: false
#| fig-cap: "Median toxicity among accounts which posted each week for blocked and blocking user accounts. The median toxicity remained flat for all groups."
plots$fig.median.toxicity.week <- data$timeseries.byweek %>% drop_na(toxicity) %>%
  group_by(block_type, selected, time) %>% summarize(median=median(toxicity), mean=mean(toxicity), median_cl(toxicity)) %>%
  mutate(selected=ifelse(selected==TRUE, "Treatment", "Control")) %>%
  mutate(selected=factor(selected)) %>%
  ggplot(., aes(x=time, y=median, ymax=ymax, ymin=ymin, group=interaction(selected, block_type))) +
  facet_wrap(. ~ block_type, ncol=1) +
  geom_line(aes(linetype=selected)) +
  geom_ribbon(aes(fill=selected), alpha=0.25) +
  scale_linetype_manual(values = c("Control" = "dotted", "Treatment" = "solid"))  +
  geom_vline(xintercept = 0, linetype="dashed", color="black") +
  expand_limits(y = 0) +
  #scale_fill_manual(values=c("grey80", "grey30")) +
  viridis::scale_fill_viridis(discrete=TRUE, option="inferno", direction=-1) +
  theme(legend.title= element_blank(), legend.position='bottom') +
  labs(x="Weeks since treatment", y="Median toxicity")  + scale_y_continuous(breaks=breaks_pretty(n=3))
plots$fig.median.toxicity.week
```

```{r}
#| label: tbl-toxicity-wilcoxon

df_bna <- semi_join(
    (data$timeseries.byposttreatment %>% drop_na(toxicity)),
    (data$timeseries.byposttreatment %>% drop_na(toxicity) %>% group_by(id) %>% count %>% filter(n==2)),
    by="id"
)

run_wilcox <- function(df, keys) {
    med <- df %>% group_by(post_treatment) %>% summarize(m=median(toxicity)) %>% pull(m) %>% diff
    t <- wilcox.test(toxicity ~ post_treatment, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, selected=keys$selected, median=med, W=t$statistic, p=t$p.value))
}

run_wilcox_comp <- function(df, keys) {
    med <- df %>% group_by(selected) %>% summarize(m=median(toxicity)) %>% pull(m) %>% diff
    t <- wilcox.test(toxicity ~ selected, alternative="two.sided", data=df)
    return(data.frame(block_type=keys$block_type, median=med, W=t$statistic, p=t$p.value))
}

data$table1 <- df_bna %>% arrange(block_type, desc(selected)) %>%
    group_by(block_type, selected) %>% group_map(~ run_wilcox(.x, .y)) %>% bind_rows() %>%
    arrange(block_type, desc(selected)) %>% add_column(Group=c("$U_0$", "$C_0$", "$U_1$", "$C_1$")) %>% select(Group, median, W, p)
table2 <- df_bna %>% arrange(id) %>% group_by(block_type, selected) %>% 
    reframe(toxicity = toxicity[post_treatment == TRUE] - toxicity[post_treatment == FALSE]) %>%
    group_by(block_type) %>% group_map(~ run_wilcox_comp(.x, .y)) %>% bind_rows() %>%
    add_column(Group=c("$\\Delta_0$", "$\\Delta_1$")) %>% select(Group, median, W, p)

plots$tbl.toxicity.wilcoxon <- rbind(data$table1, table2) |> 
  tinytable::tt() |>
  tinytable::format_tt(j = 1, quarto = TRUE) |>
  tinytable::format_tt(j = c(2, 4), digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 3, digits = 1, num_fmt = "decimal", num_zero=T) |>
  tinytable::style_tt(i = 5, line = "t", line_color = "black", line_width = 0.1) |>
  tinytable::style_tt(j = 2:4, align = "d")

plots$tbl.toxicity.wilcoxon
```

```{r}
#| label: tbl-did-toxicity
#| fig-cap: Beta regression coefficients drawn from the posterior of the parametric toxicity DiD model for user accounts whose server was defederated (blocked group) or whose server defederated another (blocking group). For all groups, the 95% credible intervals for a change in toxicity levels after treatment ($\beta_1$, $\beta_5$) contain 0.
#| fig-scap: "DiD toxicity coefficients"
did_toxicity_df <- readRDS(here::here("data/defederation/table_toxicity.rds"))
toxicity_df <- did_toxicity_df %>% select(selected, term, value, sd, .lower, .upper, block_type)
plots$tbl.toxicity.did <- toxicity_df %>%
  filter(block_type=="Blocked") %>%
  select(-block_type) %>%
  left_join(
  toxicity_df %>% filter(block_type=="Blocking") %>% select(-block_type),
    by = c("term", "selected")
  ) %>%
  mutate(selected = ifelse(selected, "Control", "Treatment"))
  
plots$tbl.toxicity.did |>
  tinytable::tt() %>%
  setNames(c("Group", "Term", "Estimate", "Std. err.", "Low", "High", "Estimate", "Std. err.", "Low", "High")) %>%
  tinytable::group_tt(j = list("Blocked" = 2:6, "Blocking" = 6:9)) |>
  tinytable::format_tt(digits = 3, num_fmt = "decimal", num_zero = T, num_suffix = F) |>
  tinytable::format_tt(j = 2, quarto = TRUE) |>
  tinytable::style_tt(j = 3:10, align = "d")
```


```{r}
#| label: save-data

saveRDS(data, here::here("data/processed/defederation_data.rds"))
saveRDS(plots, here::here("data/processed/defederation_plots.rds"))
```