Evaluate sensitivity to time-varying confounders
#library
library(tidyverse)
library(lavaan)
library(data.table)
library(doParallel)
library(parallel)
library(viridis)
library(fixest)
#data
load(file = "results/predicted_means/240816_pred-means-cleaned-df.Rdata")
Estimate effects of confidants educational attainment on political attitudes Estimate effects on confidants educational attainment.
#create a long file from df_combined
data <- pred_results$df_combined %>%
#select variables
select(nomem_encr,
matches("^educ_[[:digit:]]"),
matches("^Feduc_a_[[:digit:]]"),
matches("female_"),
matches("^age_[[:digit:]]"),
matches("burgstat_"),
matches("married_"),
matches("work_[[:digit:]]"),
matches("origin_"),
matches("eu_[[:digit:]]"),
matches("cult_[[:digit:]]"),
matches("inc_diff_[[:digit:]]"),
matches("inc_ln_")) %>%
#create long file
pivot_longer(cols = 2:ncol(.),
names_to = c("measure", "wave"),
values_to = "value",
names_pattern = "(.+)\\_(.+)") %>%
pivot_wider(names_from = "measure",
values_from = "value") %>%
mutate(wave = as.numeric(wave)) %>%
arrange(nomem_encr, wave)
#there are two respondents who change origin
#remove from datafile
outlier_origin <- data %>%
group_by(nomem_encr) %>%
summarise(sd_origin = sd(origin, na.rm = T),
sd_educ = sd(educ, na.rm = T)) %>%
filter(sd_origin != 0) %>%
pull(nomem_encr)
#there are some respondents who change education after 25th
#remove them from the data
outlier_education<- data %>%
group_by(nomem_encr) %>%
summarise(sd_origin = sd(origin, na.rm = T),
sd_educ = sd(educ, na.rm = T)) %>%
filter(sd_educ != 0) %>%
pull(nomem_encr)
#clean DF
data_cleaned <- data %>%
filter(!nomem_encr %in% outlier_education) %>%
filter(!nomem_encr %in% outlier_origin)
#add some backgroudn variables from LISS core file
load(file = "data/data-processed/liss_merged/liss_core_merged_V3_240624.Rdata")
#select variables from LISS core file
liss_selection <- liss_long %>%
select(nomem_encr,
sted,
woning,
survey_wave,
aantalki) %>%
rename(wave = survey_wave)
#add variables with a left join
data_cleaned <- data_cleaned %>%
left_join(liss_selection, by = c("nomem_encr", "wave"))
#listwise deletion (less problematic in long file)
data_cleaned <- data_cleaned %>%
na.omit()
# create function for estimating models
fe_pr_estimate <- function(x) {
df_selection <- x
#pooled effects
pr_model1 <- lm(dependent ~ 1 +
Feduc_a,
data = df_selection)
pr_model2 <- lm(dependent ~ 1 +
Feduc_a +
educ,
data = df_selection)
pr_model3 <- lm(
dependent ~ 1 +
Feduc_a +
educ +
age +
as.factor(married) +
as.factor(origin) +
as.factor(work) +
inc_ln +
sted +
aantalki,
data = df_selection
)
#store pr models
pr_models <- list(pr_model1 = pr_model1,
pr_model2 = pr_model2,
pr_model3 = pr_model3)
#Fixed effects
fe_model1 <- feols(dependent ~ 1 +
Feduc_a | nomem_encr,
data = df_selection) %>%
summary(., vcov = "twoway")
fe_model2 <- feols(dependent ~ 1 +
Feduc_a +
educ | nomem_encr,
data = df_selection) %>%
summary(., vcov = "twoway")
fe_model3 <- feols(
dependent ~ 1 +
Feduc_a +
educ +
age +
as.factor(married) +
as.factor(origin) +
as.factor(work) +
inc_ln +
sted +
aantalki | nomem_encr,
data = df_selection
) %>%
summary(., vcov = "twoway")
#store fe models
fe_models <- list(fe_model1 = fe_model1,
fe_model2 = fe_model2,
fe_model3 = fe_model3)
#store models
models <- list(pr_models = pr_models,
fe_models = fe_models)
return(models)
}
#apply function to data
#create list based on dep_var
data_list <- data_cleaned %>%
pivot_longer(cols = c("eu", "cult", "inc_diff"),
names_to = "dep_var",
values_to = "dependent") %>%
group_split(dep_var)
#store names of depvar
names_depvar <- c(unique(data_list[[1]]$dep_var),
unique(data_list[[2]]$dep_var),
unique(data_list[[3]]$dep_var))
#estimate models of depvar
re_fe_models_list <- foreach(a = 1:3) %do%{
fe_pr_estimate(data_list[[a]])
}
#extract model dfs
estimate_df <- foreach(c = 1:3,
.combine = rbind) %:% #a = 1
foreach(a = 1:2,
.combine = rbind) %:% #a = 1
foreach(b = 1:3,
.combine = rbind) %do% {
#b = 1
re_fe_models_list[[c]][[a]][[b]] %>%
broom::tidy() %>%
mutate(dep_var = names_depvar[[c]],
model = b,
model_type = a)
}
fe_model_educ_df <- feols(
Feduc_a ~ 1 +
educ +
age +
as.factor(married) +
as.factor(origin) +
as.factor(work) +
inc_ln +
sted +
aantalki | nomem_encr,
data = data_cleaned
) %>%
summary(., vcov = "twoway") %>%
broom::tidy() %>%
mutate(dep_var ="Feduc_a",
model = 3,
model_type = 2)
pr_model_educ_df <- lm(
Feduc_a ~ 1 +
educ +
age +
as.factor(married) +
as.factor(origin) +
as.factor(work) +
inc_ln +
sted +
aantalki,
data = data_cleaned
) %>%
broom::tidy() %>%
mutate(dep_var ="Feduc_a",
model = 3,
model_type = 1)
educ_df <- fe_model_educ_df %>%
bind_rows(pr_model_educ_df)
#Feduc effect plot after control
educ_control_plot <- estimate_df %>%
#select correct terms.
filter(term == "Feduc_a") %>%
filter(!(model_type == 2 & model == 2)) %>% #no model 2 for FE models
#create labels
mutate(
model_type = factor(
model_type,
levels = 1:2,
labels = c("Pooled regression",
"Fixed effects")
),
sig = ifelse(p.value < 0.05, 1, 0),
model = factor(
model,
levels = 1:3,
labels = c("No controls",
"Educ. ego \n control",
"All controls")
),
dep_var = case_when(dep_var == "cult" ~ 1,
dep_var == "eu" ~ 2,
dep_var == "inc_diff" ~ 3),
dep_var_fac = factor(
dep_var,
levels = 1:3,
labels = c("Cultural inclusion",
"EU-integration",
"Income equality")
),
term = case_when(
term == "sted" ~ 1,
term == "inc_ln" ~ 2,
term == "educ" ~ 3,
term == "as.factor(work)1" ~ 4,
term == "as.factor(origin)1" ~ 5,
term == "as.factor(married)1" ~ 6,
term == "age" ~ 7,
term == "aantalki" ~ 8
),
term_fac = factor(term,
levels = 1:8,
labels = c(
"Urbanisation",
"Income",
"Education (in years)",
"Works",
"Migration background",
"Married",
"Age",
"# Children"
))
) %>%
#create plot
ggplot(aes(y = estimate,
x = as.factor(model))) +
geom_hline(linetype = "dashed",
yintercept = 0, ) +
geom_linerange(aes(
ymin = estimate - std.error * 1.96,
ymax = estimate + std.error * 1.96
)) +
geom_point(aes(colour = as.factor(sig)),
size = 2) +
facet_grid(
rows = vars(dep_var_fac),
cols = vars(model_type),
scales = "free"
) +
scale_colour_viridis(discrete = T,
option = "D") +
coord_flip() +
theme(
panel.background = element_rect(fill = "#FFFFFF",
colour = "black"),
plot.background = element_rect(fill = "#FFFFFF",
colour = "black"),
panel.grid = element_line(colour = "grey"),
panel.grid.major.x = element_blank(),
text = element_text(family = "sans", size = 12),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(x = "",
y = "Estimate")
#save plot
ggsave(plot = educ_control_plot,
file = "plots/results/educ_control_plot.jpg",
dpi = 600, width = 6, height = 4)
full_control_plot <- estimate_df %>%
#combine estimates with Feduca estimates
bind_rows(educ_df) %>%
#filter out intercept and Feduc_a terms
filter((term != "(Intercept)") & (term != "Feduc_a")) %>%
filter(model == 3) %>% #only full models
#change labels
mutate(
model_type = factor(
model_type,
levels = 1:2,
labels = c("Pooled regression",
"Fixed effects")
),
sig = ifelse(p.value < 0.05, 1, 0),
model = factor(
model,
levels = 1:3,
labels = c("No controls",
"Educ. ego \n control",
"All controls")
),
dep_var = case_when(
dep_var == "cult" ~ 1,
dep_var == "eu" ~ 2,
dep_var == "inc_diff" ~ 3,
dep_var == "Feduc_a" ~ 4,
),
dep_var_fac = factor(
dep_var,
levels = 1:4,
labels = c(
"Cultural inclusion",
"EU-integration",
"Income equality",
"Confidants' \n education"
)
),
term = case_when(
term == "sted" ~ 1,
term == "inc_ln" ~ 2,
term == "educ" ~ 3,
term == "as.factor(work)1" ~ 4,
term == "as.factor(origin)1" ~ 5,
term == "as.factor(married)1" ~ 6,
term == "age" ~ 7,
term == "aantalki" ~ 8
),
term_fac = factor(
term,
levels = 1:8,
labels = c(
"Urbanisation",
"Income",
"Education (in years)",
"Works",
"Migration background",
"Married",
"Age",
"# Children"
)
)
) %>%
#create coef plot
ggplot(aes(
y = estimate,
x = term_fac,
group = as.factor(model)
)) +
geom_hline(linetype = "dashed",
yintercept = 0, ) +
geom_linerange(aes(
ymin = estimate - std.error * 1.96,
ymax = estimate + std.error * 1.96
)) +
geom_point(aes(colour = as.factor(sig)),
size = 2) +
facet_grid(
rows = vars(dep_var_fac),
cols = vars(model_type),
scales = "free"
) +
scale_colour_viridis(discrete = T,
option = "D") +
coord_flip() +
theme(
panel.background = element_rect(fill = "#FFFFFF",
colour = "black"),
plot.background = element_rect(fill = "#FFFFFF",
colour = "black"),
panel.grid = element_line(colour = "grey"),
panel.grid.major.x = element_blank(),
text = element_text(family = "sans", size = 12),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(x = "",
y = "Estimate")
#save plot
ggsave(plot = full_control_plot,
file = "plots/results/full_control_plot.jpg",
dpi = 600, width = 8, height = 6)
Copyright © 2024 Jeroense Thijmen