Goal

Check origin of CDN education changes.

Set up

#library
library(tidyverse)
library(lavaan)
library(data.table)
library(doParallel)
library(parallel)

#data
load("data/data-processed/ml_sem_data/240813_lisscdn-mlsem-panel-data-cleaned.Rdata")
load("data/data-processed/lisscdn_cl-ready_240624.Rdata")

Functions

#recode function for education alter
feducation_alter <- function(x) {
  x2 <- ifelse(x == 1, 4, x)
  x3 <- ifelse(x == 2, 6, x2)
  x4 <- ifelse(x == 3, 10, x3)
  x5 <- ifelse(x == 4, 11.5, x4)
  x6 <- ifelse(x == 5, 10.5, x5)
  x7 <- ifelse(x == 6, 15, x6)
  x8<- ifelse(x == 7, 16, x7)
  return(x8)
}

Data preperation

#create educ_test datafile
educ_test <- liss_long %>%
  select(nomem_encr,
         survey_wave,
         starts_with("alter_id"),
         starts_with("educ_alter")) %>%
  rename(
    educ_alter.1 = educ_alter1,
    educ_alter.2 = educ_alter2,
    educ_alter.3 = educ_alter3,
    educ_alter.4 = educ_alter4,
    educ_alter.5 = educ_alter5,
    alter_id.1 = alter_id_1,
    alter_id.2 = alter_id_2,
    alter_id.3 = alter_id_3,
    alter_id.4 = alter_id_4,
    alter_id.5 = alter_id_5
  ) %>%
  pivot_longer(
    3:12,
    names_to = c("name", "alter"),
    names_pattern = "(.+)\\.(.+)",
    values_to = "value"
  ) %>%
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(dyad_id = paste0(nomem_encr, alter_id),
         survey_wave = as.numeric(survey_wave)) %>%
  filter(!is.na(alter_id)) %>%
  select(!alter) %>% 
  arrange(nomem_encr, alter_id, survey_wave) %>% 
  mutate(educ_alter = feducation_alter(educ_alter))

#extract data that is used in this project
MyData <- mlsem_datafiles[[3]]
sample <- MyData$nomem_encr

#filter educ_test data
educ_test <- educ_test %>% 
  filter(nomem_encr %in% sample)

Check 1: Linear changes in education of alters

#check linear changes in education of alters
#create seperate lists
educ_test_df_list <- educ_test %>% 
  group_split(nomem_encr, alter_id)

#set up parallelization
numCores <- detectCores()
registerDoParallel(core = 6)

lm_educ_test <- foreach(
  a = 1:length(educ_test_df_list), #full run
  .packages = c("tidyverse", #packages needed in dopar
                "lavaan"),
  .combine = rbind, #used to combine dfs
  .errorhandling = "remove"
) %dopar% {
  #run lm and store results
  list_result <- educ_test_df_list[[a]] %>%
    lm(educ_alter ~ survey_wave,
                  data = .)
  #cleand results
  df_clean <- list_result %>%
    broom::tidy(.)
  
  return(df_clean)
}

#clean datafile with lm results
lm_educ_results <- lm_educ_test %>% 
  filter(term == "survey_wave") %>% 
  filter(!is.na(estimate)) %>% 
  filter(!is.na(std.error))

#create plot that shows changes in alters education
lm_educ_alter_plot <- lm_educ_results %>% 
  ggplot(aes(x = estimate)) +
  geom_histogram(binwidth = 0.1) +
  geom_vline(xintercept = mean(lm_educ_results$estimate),
             color = "red") +
  annotate("text",
           x = 1, 
           y = 1000,
           label = paste0("Mean: ", round(mean(lm_educ_results$estimate), 3)),
           size = 2) +
  theme_minimal() +
  labs(x = "Linear change estimate",
       y = "Count")

ggsave(lm_educ_alter_plot,
       file = "plots/rr/lm_educ_alter_plot.jpg",
       height = 4,
       width = 6,
       dpi = 600)

Check 2: Change in mean educational attainment for unstable and completely stable nettwoks.

#check stable alters and create stability of network
alters_long <- educ_test %>% 
  group_by(nomem_encr, alter_id) %>% 
  mutate(rl_alter = ifelse(is.na(alter_id), NA, 1:n())) %>%
  ungroup() %>% 
  mutate(rl_alter = ifelse(rl_alter > 1, 0, rl_alter))

#create list to use in loop
list_alters <- alters_long %>% 
  arrange(nomem_encr, survey_wave) %>% 
  group_split(nomem_encr)

#crete
list_networks <- list()

for(j in 1:length(list_alters)) {
  #j = 1
  #outer loop, select networks
  list_test <- list_alters[[j]] %>%
    group_split(survey_wave)
  
  
  list_dfs <- list()
  #inner while loop. Create returning dfs
  i <- 1
  while (i < length(list_test)) {
    #i = 1
    returning <- c(list_test[[i]] %>%
                     pull(alter_id)) %in%
      c(list_test[[i + 1]] %>%
          pull(alter_id))
    
    list_dfs[[i]] <-  tibble(returning,
                                 alter_id = c(list_test[[i]] %>%
                                                pull(alter_id))) %>%
      mutate(
        returning = as.numeric(returning),
        survey_wave = list_test[[i]] %>%
          select(survey_wave) %>%
          distinct() %>%
          pull(survey_wave)
      )
    i <- i + 1
  }
  
  if(length(list_dfs) != 0){
  #combine dfs
  list_networks[[j]] <- list_dfs %>%
    bind_rows() %>%
    mutate(nomem_encr = list_alters[[j]] %>%
             select(nomem_encr) %>%
             distinct() %>%
             pull(nomem_encr)) %>%
    select(nomem_encr, alter_id, survey_wave, returning) %>% 
    bind_rows(list_test[[length(list_test)]] %>% 
      select(nomem_encr, alter_id, survey_wave) %>% 
      mutate(returning = 0)) #add last wave of data
  }
}
#Combine dfs
test_df <- list_networks %>% 
  bind_rows()

#create stability indicator for each alter in a network
test_df <- test_df %>% 
  arrange(nomem_encr, alter_id, survey_wave) %>% 
  group_by(nomem_encr, alter_id) %>% 
  mutate(stable = ifelse(lag(returning == 1) | returning == 1, 1, 0),
         stable = ifelse(is.na(stable), 0, stable)) %>% 
  arrange(nomem_encr, survey_wave, alter_id)

#extract alter education data
educ_sel <- alters_long %>% 
  select(nomem_encr, survey_wave, alter_id, educ_alter)

#add alter education data
test_df <- test_df %>% 
  left_join(educ_sel, by = c("nomem_encr", "survey_wave", "alter_id"))

#create indicator whether alters have changed
changes_alter <- test_df %>% 
  filter(stable == 1) %>% 
  group_by(nomem_encr, alter_id) %>% 
  mutate(change_alter_educ = ifelse((educ_alter - lag(educ_alter)) != 0, 1, 0),
         changes_alter_educ = educ_alter - lag(educ_alter)) %>% 
  ungroup()

#create df for network change
change_df <- test_df %>% 
  group_by(nomem_encr, survey_wave) %>% 
  summarise(net_size = n(),
         sum_stable = sum(stable,na.rm = T),
         net_educ = mean(educ_alter, na.rm =T)) %>% 
  ungroup() %>% 
  group_by(nomem_encr) %>% 
  mutate(change_educ = net_educ - lag(net_educ)) %>% 
  mutate(net_stability = (sum_stable/(net_size))*100, #percentage of network that stays
         net_change = abs(net_size - lag(net_size))) %>%  #changes in the network
  ungroup() %>% 
  mutate(complete_stable = ifelse(net_change == 0 & sum_stable == net_size, 1, 0)) #create indicator whether a network is complete stable (similar size and no internal changes)

#create plot
net_complstable_plot <- change_df %>% 
  mutate(changed_educ = ifelse(change_educ != 0, 1, 0)) %>%
  mutate(complete_stable = ifelse(net_change == 0 & sum_stable == net_size, 1, 0),
         complete_stable = factor(complete_stable,
                                  levels = 0:1,
                                  labels = c(
                                    "Unstable networks",
                                    "Complete stable networks")),
         changed_educ = factor(changed_educ,
                               levels = 0:1,
                               labels = c(
                                 "No change",
                                 "Changed"))) %>% 
  filter(!is.na(complete_stable)) %>%
  filter(!is.na(changed_educ)) %>%
  ggplot(aes(x = changed_educ)) +
  facet_wrap(vars(complete_stable)) +
  geom_bar(fill = "black",
               alpha = 0.4,
               na.rm = T) +
  theme_minimal() +
  labs(x = "Change in CDNs' educational attainment")

#save plot
ggsave(net_complstable_plot,
       file = "plots/rr/net_complstable_plot_plot.pdf",
       height = 4,
       width = 6,
       dpi = 600)

#Show plot
net_complstable_plot

---
title: "Robustness: Analysis of changes in CDN educational attainment"
author: "Thijmen Jeroense"
date: "Last compiled on `r format(Sys.time(), '%d %B, %Y')`"
output:
  html_document:
    toc: TRUE
    toc_depth: 3
    toc_float: TRUE
    code_folding: show
    code_download: TRUE
---
# Goal

Check origin of CDN education changes. 

# Set up

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  cache = TRUE,
  message = FALSE,
  warning = FALSE,
  results = "asis",
  fig.align = "center"
)
```

```{r libraries and data import}
#library
library(tidyverse)
library(lavaan)
library(data.table)
library(doParallel)
library(parallel)

#data
load("data/data-processed/ml_sem_data/240813_lisscdn-mlsem-panel-data-cleaned.Rdata")
load("data/data-processed/lisscdn_cl-ready_240624.Rdata")
```

# Functions

```{r functions}
#recode function for education alter
feducation_alter <- function(x) {
  x2 <- ifelse(x == 1, 4, x)
  x3 <- ifelse(x == 2, 6, x2)
  x4 <- ifelse(x == 3, 10, x3)
  x5 <- ifelse(x == 4, 11.5, x4)
  x6 <- ifelse(x == 5, 10.5, x5)
  x7 <- ifelse(x == 6, 15, x6)
  x8<- ifelse(x == 7, 16, x7)
  return(x8)
}

```

# Data preperation

```{r create education test file}
#create educ_test datafile
educ_test <- liss_long %>%
  select(nomem_encr,
         survey_wave,
         starts_with("alter_id"),
         starts_with("educ_alter")) %>%
  rename(
    educ_alter.1 = educ_alter1,
    educ_alter.2 = educ_alter2,
    educ_alter.3 = educ_alter3,
    educ_alter.4 = educ_alter4,
    educ_alter.5 = educ_alter5,
    alter_id.1 = alter_id_1,
    alter_id.2 = alter_id_2,
    alter_id.3 = alter_id_3,
    alter_id.4 = alter_id_4,
    alter_id.5 = alter_id_5
  ) %>%
  pivot_longer(
    3:12,
    names_to = c("name", "alter"),
    names_pattern = "(.+)\\.(.+)",
    values_to = "value"
  ) %>%
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(dyad_id = paste0(nomem_encr, alter_id),
         survey_wave = as.numeric(survey_wave)) %>%
  filter(!is.na(alter_id)) %>%
  select(!alter) %>% 
  arrange(nomem_encr, alter_id, survey_wave) %>% 
  mutate(educ_alter = feducation_alter(educ_alter))

#extract data that is used in this project
MyData <- mlsem_datafiles[[3]]
sample <- MyData$nomem_encr

#filter educ_test data
educ_test <- educ_test %>% 
  filter(nomem_encr %in% sample)

```

# Check 1: Linear changes in education of alters

```{r check linear changes}
#check linear changes in education of alters
#create seperate lists
educ_test_df_list <- educ_test %>% 
  group_split(nomem_encr, alter_id)

#set up parallelization
numCores <- detectCores()
registerDoParallel(core = 6)

lm_educ_test <- foreach(
  a = 1:length(educ_test_df_list), #full run
  .packages = c("tidyverse", #packages needed in dopar
                "lavaan"),
  .combine = rbind, #used to combine dfs
  .errorhandling = "remove"
) %dopar% {
  #run lm and store results
  list_result <- educ_test_df_list[[a]] %>%
    lm(educ_alter ~ survey_wave,
                  data = .)
  #cleand results
  df_clean <- list_result %>%
    broom::tidy(.)
  
  return(df_clean)
}

#clean datafile with lm results
lm_educ_results <- lm_educ_test %>% 
  filter(term == "survey_wave") %>% 
  filter(!is.na(estimate)) %>% 
  filter(!is.na(std.error))

#create plot that shows changes in alters education
lm_educ_alter_plot <- lm_educ_results %>% 
  ggplot(aes(x = estimate)) +
  geom_histogram(binwidth = 0.1) +
  geom_vline(xintercept = mean(lm_educ_results$estimate),
             color = "red") +
  annotate("text",
           x = 1, 
           y = 1000,
           label = paste0("Mean: ", round(mean(lm_educ_results$estimate), 3)),
           size = 2) +
  theme_minimal() +
  labs(x = "Linear change estimate",
       y = "Count")

ggsave(lm_educ_alter_plot,
       file = "plots/rr/lm_educ_alter_plot.jpg",
       height = 4,
       width = 6,
       dpi = 600)

```

# Check 2: Change in mean educational attainment for unstable and completely stable nettwoks.

```{r check stable alters}
#check stable alters and create stability of network
alters_long <- educ_test %>% 
  group_by(nomem_encr, alter_id) %>% 
  mutate(rl_alter = ifelse(is.na(alter_id), NA, 1:n())) %>%
  ungroup() %>% 
  mutate(rl_alter = ifelse(rl_alter > 1, 0, rl_alter))

#create list to use in loop
list_alters <- alters_long %>% 
  arrange(nomem_encr, survey_wave) %>% 
  group_split(nomem_encr)

#crete
list_networks <- list()

for(j in 1:length(list_alters)) {
  #j = 1
  #outer loop, select networks
  list_test <- list_alters[[j]] %>%
    group_split(survey_wave)
  
  
  list_dfs <- list()
  #inner while loop. Create returning dfs
  i <- 1
  while (i < length(list_test)) {
    #i = 1
    returning <- c(list_test[[i]] %>%
                     pull(alter_id)) %in%
      c(list_test[[i + 1]] %>%
          pull(alter_id))
    
    list_dfs[[i]] <-  tibble(returning,
                                 alter_id = c(list_test[[i]] %>%
                                                pull(alter_id))) %>%
      mutate(
        returning = as.numeric(returning),
        survey_wave = list_test[[i]] %>%
          select(survey_wave) %>%
          distinct() %>%
          pull(survey_wave)
      )
    i <- i + 1
  }
  
  if(length(list_dfs) != 0){
  #combine dfs
  list_networks[[j]] <- list_dfs %>%
    bind_rows() %>%
    mutate(nomem_encr = list_alters[[j]] %>%
             select(nomem_encr) %>%
             distinct() %>%
             pull(nomem_encr)) %>%
    select(nomem_encr, alter_id, survey_wave, returning) %>% 
    bind_rows(list_test[[length(list_test)]] %>% 
      select(nomem_encr, alter_id, survey_wave) %>% 
      mutate(returning = 0)) #add last wave of data
  }
}
#Combine dfs
test_df <- list_networks %>% 
  bind_rows()

#create stability indicator for each alter in a network
test_df <- test_df %>% 
  arrange(nomem_encr, alter_id, survey_wave) %>% 
  group_by(nomem_encr, alter_id) %>% 
  mutate(stable = ifelse(lag(returning == 1) | returning == 1, 1, 0),
         stable = ifelse(is.na(stable), 0, stable)) %>% 
  arrange(nomem_encr, survey_wave, alter_id)

#extract alter education data
educ_sel <- alters_long %>% 
  select(nomem_encr, survey_wave, alter_id, educ_alter)

#add alter education data
test_df <- test_df %>% 
  left_join(educ_sel, by = c("nomem_encr", "survey_wave", "alter_id"))

#create indicator whether alters have changed
changes_alter <- test_df %>% 
  filter(stable == 1) %>% 
  group_by(nomem_encr, alter_id) %>% 
  mutate(change_alter_educ = ifelse((educ_alter - lag(educ_alter)) != 0, 1, 0),
         changes_alter_educ = educ_alter - lag(educ_alter)) %>% 
  ungroup()

#create df for network change
change_df <- test_df %>% 
  group_by(nomem_encr, survey_wave) %>% 
  summarise(net_size = n(),
         sum_stable = sum(stable,na.rm = T),
         net_educ = mean(educ_alter, na.rm =T)) %>% 
  ungroup() %>% 
  group_by(nomem_encr) %>% 
  mutate(change_educ = net_educ - lag(net_educ)) %>% 
  mutate(net_stability = (sum_stable/(net_size))*100, #percentage of network that stays
         net_change = abs(net_size - lag(net_size))) %>%  #changes in the network
  ungroup() %>% 
  mutate(complete_stable = ifelse(net_change == 0 & sum_stable == net_size, 1, 0)) #create indicator whether a network is complete stable (similar size and no internal changes)

#create plot
net_complstable_plot <- change_df %>% 
  mutate(changed_educ = ifelse(change_educ != 0, 1, 0)) %>%
  mutate(complete_stable = ifelse(net_change == 0 & sum_stable == net_size, 1, 0),
         complete_stable = factor(complete_stable,
                                  levels = 0:1,
                                  labels = c(
                                    "Unstable networks",
                                    "Complete stable networks")),
         changed_educ = factor(changed_educ,
                               levels = 0:1,
                               labels = c(
                                 "No change",
                                 "Changed"))) %>% 
  filter(!is.na(complete_stable)) %>%
  filter(!is.na(changed_educ)) %>%
  ggplot(aes(x = changed_educ)) +
  facet_wrap(vars(complete_stable)) +
  geom_bar(fill = "black",
               alpha = 0.4,
               na.rm = T) +
  theme_minimal() +
  labs(x = "Change in CDNs' educational attainment")

#save plot
ggsave(net_complstable_plot,
       file = "plots/rr/net_complstable_plot_plot.pdf",
       height = 4,
       width = 6,
       dpi = 600)

#Show plot
net_complstable_plot
```




Copyright © 2024 Jeroense Thijmen