Preparing data
Libraries
fpackage.check <- function(packages) { # (c) Jochem Tolsma
lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
}
packages = c("tidyverse","kableExtra", "ggplot2", "patchwork", "foreach", "ggpattern")
fpackage.check(packages)
Import results and data
Import the NSUM data and recreate the NSUM module.
#import nells file.
load(file = "data_analysis/data/data_processed/nells_data/2022-11-09_nells-nsum-prepped-data.rds")
Import the model estimates from the estimated NSUM models. I have
chosen the model which uses Ibrahim for the ethnic names.
if (file.exists(
"data_analysis/results/nsum_output/main/combined_data/df_models_nsum_long.rds"
)) {
load(file = "data_analysis/results/nsum_output/main/combined_data/df_models_nsum_long.rds")
} else {
list_files <-
as.list(
dir(
"data_analysis/results/nsum_output/main/model/",
full.names = T
)
)
#create loop lists
kds <- list()
kdssd <- list()
data <- list()
list_df <- list()
#loop to extract information
for (i in 1:length(list_files)) {
#i = 1
print(paste0("Number ", i, " of ", length(list_files)))
load(list_files[[i]])
kds[[i]] <-
rowMeans(degree$d.values, na.rm = TRUE) # calculate rowmean of netsize iterations: so the retained chains
kdssd[[i]] <-
matrixStats::rowSds(degree$d.values) # calculate sd of 4k estimates per row: sd for those values
data[[i]] <- cbind(kds[[i]], kdssd[[i]]) # combine them
list_df[[i]] <-
cbind(as_tibble(data[[i]]), nells_nsum$id) # add NELLS id variable
strings <-
str_split(str_extract(list_files[[i]][1], pattern = "estimates.+"),
pattern = "_") # add holdout number
list_df[[i]] <- list_df[[i]] %>%
mutate(
holdout = as.numeric(str_extract(strings[[1]][2], pattern = "[[:digit:]]{1,}")))
}
#combine results and save
df_models_nsum_long <- list_df %>%
bind_rows() %>%
rename(mean = V1,
sd = V2,
id = 3)
#save image
save(df_models_nsum_long, file = "data_analysis/results/nsum_output/main/combined_data/df_models_nsum_long.rds")
}
We use Ibrahim as population for the size estimates, so let’s combine
the size estimates from holdout 10 with the other NSUM information.
#select holdout ten 10
size_selection <- df_models_nsum_long %>%
dplyr::filter(holdout == 10)
#add netsize data to NELLS data
nells_df <- size_selection %>%
left_join(nells_nsum, by = "id")
Selection of respondents
We remove 32 observations as they deviate more than 3 SD from the
mean.
nells_df <- nells_df %>%
mutate(mean_size = mean(mean, na.rm = T),
sd_size = sd(mean, na.rm = T),
z = (mean - mean_size)/sd_size) %>%
filter(z < 3)
#filter out other
nells_df <- nells_df %>%
filter(migration_background_fac != "Other")
Describing network size
First of all, we want to show the density distribution of extended
network size. We also show the median size of extended networks. These
estimates are in line with previous estimates that have been found of
extended network size.
options(scipen = 999)
size_density_plot <- nells_df %>%
ggplot(aes(x = mean)) +
geom_density(alpha = 0.4,
colour = "black",
fill = "grey") +
geom_vline(xintercept = median(nells_df$mean, na.rm = T),
colour = "red") +
annotate(
"text",
x = 1500,
y = 0.0008,
label = paste("Median:", as.character(round(
median(nells_df$mean, na.rm = T), 3
))),
colour = "black"
) +
#facet_wrap(vars(migration_background_fac)) +
scale_fill_viridis_d() +
scale_color_viridis_d() +
theme(
panel.background = element_rect(fill = "#FFFFFF"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_text(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "right",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(x = "Extended social network size", y = "Density")
#show plot
size_density_plot
#export plot
ggsave(size_density_plot,
file = "data_analysis/plots/descriptive/density_network_size.jpg",
width = 5,
height = 4,
dpi = 320)
Group comparison
To present differences in network size
Extended groups boxplot
nells_df <- nells_df %>%
mutate(
migration_background_fac = fct_relevel(migration_background_fac,
"Dutch",
"1st gen Turkish",
"2nd gen Turkish",
"1st gen Moroccan",
"2nd gen Moroccan"),
migration_background_fac = factor(as.numeric(migration_background_fac),
levels = 1:5,
labels = c("Dutch Majority",
"1st gen Turkish-Dutch",
"2nd gen Turkish-Dutch",
"1st gen Moroccan-Dutch",
"2nd gen Moroccan-Dutch")),
migration_background_simple_fac = case_when(
migration_background_fac == "1st gen Turkish-Dutch" ~ 2,
migration_background_fac == "2nd gen Turkish-Dutch" ~ 2,
migration_background_fac == "1st gen Moroccan-Dutch" ~ 3,
migration_background_fac == "2nd gen Moroccan-Dutch" ~ 3,
migration_background_fac == "Dutch Majority" ~ 1
),
migration_background_simple_fac = factor(
migration_background_simple_fac,
levels = 1:3,
labels = c("Dutch Majority", "Turkish-Dutch", "Moroccan-Dutch")
),
migrant_generation = case_when(
str_detect(migration_background_fac, "1st") ~ 1,
str_detect(migration_background_fac, "2nd") ~ 2
)
)
Boxplot
Create panel with complete groups (no generation distinction).
#set custom pallet
pal <- c("#66c2a5",
"#fc8d62",
"#8da0cb")
#create simple boxplot
boxplot_size_simple_groups <- nells_df %>%
ggplot(aes(x = fct_rev(migration_background_simple_fac),
y = mean,
fill = migration_background_simple_fac,
colour = migration_background_simple_fac
)) +
geom_boxplot(alpha = 0.6) +
coord_flip() +
scale_colour_manual(
values = pal,
aesthetics = c("colour", "fill")
) +
theme(
panel.background = element_rect(fill = "#FFFFFF"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_text(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(y = "",
x = "")
Create panel with generation distinction.
#set custom pallet
pal <- c("#fc8d62",
"#fc8d62",
"#8da0cb",
"#8da0cb")
#create extended boplot
boxplot_size_extended_groups <- nells_df %>%
filter(migration_background_fac != "Dutch Majority") %>%
ggplot(aes(x = fct_rev(migration_background_fac),
y = mean,
fill = migration_background_fac,
colour = migration_background_fac
)) +
geom_boxplot_pattern(aes(pattern_density = as.factor(migrant_generation)),
alpha = 0.6,
pattern = "circle"
) +
coord_flip() +
scale_colour_manual(
values = pal,
aesthetics = c("colour", "fill")
) +
scale_pattern_density_manual(values = c("1" = 0, "2"=0.1)) +
theme(
panel.background = element_rect(fill = "#FFFFFF"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_text(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(y = "",
x = "")
Combine panels in multipanel plot.
## Panel plot
hom_size_panel <- boxplot_size_simple_groups +
boxplot_size_extended_groups +
plot_annotation(tag_levels ='a',
tag_prefix = '(',
tag_suffix = ')') +
plot_layout(ncol = 1,
guides = "collect",
heights = c(1,2)) &
theme(legend.position='none')
hom_size_panel
ggsave(hom_size_panel,
file = "data_analysis/plots/descriptive/size_plot_panel.jpg",
width = 8,
height = 5,
dpi = 320)
Ethnic Homogeneity
Multipanel boxplot
Prepare data for ethnic homogeneity plot.
#weighted by name frequency.
nells_df <- nells_df %>%
mutate(
sum_dutch_w = knows_daan_boundary/22704 +
knows_kevin_boundary/23167 +
knows_emma_boundary/18730 +
knows_linda_boundary/29955 +
knows_albert_boundary/31767 +
knows_edwin_boundary/21866 +
knows_willemina_boundary/17133 +
knows_ingrid_boundary/31323,
sum_turkish_w = knows_ibrahim_boundary/2099 +
knows_esra_boundary/1878,
sum_moroccan_w = knows_mohammed_boundary/13448 +
knows_fatima_boundary/2808,
sum_total_w = sum_dutch_w + sum_turkish_w + sum_moroccan_w,
per_dutch_w = (sum_dutch_w / sum_total_w) * 100,
per_turkish_w = (sum_turkish_w / sum_total_w) * 100,
per_moroccan_w = (sum_moroccan_w / sum_total_w) * 100
)
#assign correct percentage co-ethnic to each group
nells_df <- nells_df %>%
mutate(per_ingroup_w = case_when(
str_detect(migration_background_fac, "kish") ~ per_turkish_w,
str_detect(migration_background_fac, "occan") ~ per_moroccan_w,
migration_background_fac == "Dutch Majority" ~ per_dutch_w,
migration_background_fac == "Other" ~ per_dutch_w
))
Create panel with complete groups (no generation distinction).
#set custom pallet
pal <- c("#66c2a5",
"#fc8d62",
"#8da0cb")
#create graph for simple groups
boxplot_hom_simple_groups <- nells_df %>%
ggplot(aes(x = fct_rev(migration_background_simple_fac),
y = per_ingroup_w,
fill = migration_background_simple_fac,
colour = migration_background_simple_fac
)) +
geom_boxplot(alpha = 0.6) +
coord_flip() +
scale_colour_manual(
values = pal,
aesthetics = c("colour", "fill")
) +
theme(
panel.background = element_rect(fill = "#FFFFFF"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_text(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(y = "",
x = "")
Create panel with generation distinction.
#set custom pallet
pal <- c("#fc8d62",
"#fc8d62",
"#8da0cb",
"#8da0cb")
#create boxplot for extended groups
boxplot_hom_extended_groups <- nells_df %>%
filter(migration_background_fac != "Dutch Majority") %>%
ggplot(aes(x = fct_rev(migration_background_fac),
y = per_ingroup_w,
colour = migration_background_fac,
fill = migration_background_fac,
)) +
geom_boxplot_pattern(aes(pattern_density = as.factor(migrant_generation)),
alpha = 0.6,
pattern = "circle"
) +
coord_flip() +
scale_colour_manual(
values = pal,
aesthetics = c("colour", "fill")
) +
scale_pattern_density_manual(values = c("1" = 0, "2"=0.1)) +
theme(
panel.background = element_rect(fill = "#FFFFFF"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_text(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#A9A9A9"),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(y = "",
x = "")
Create multipanel plot.
## Panel plot
hom_plot_panel <- boxplot_hom_simple_groups +
boxplot_hom_extended_groups +
plot_annotation(tag_levels ='a',
tag_prefix = '(',
tag_suffix = ')') +
plot_layout(ncol = 1,
guides = "collect",
heights = c(1,2)) &
theme(legend.position='none')
#show plot
hom_plot_panel
#save plor
ggsave(hom_plot_panel,
file = "data_analysis/plots/descriptive/hom_plot_panel.jpg",
width = 8,
height = 5,
dpi = 320)
Name differences
For every x (for now names) we can estimate an NB regression to see
differences between migration backgrounds. Please note: this does not
take into account naming frequency in the population. Differences
between different ethnic groups may indeed be larger or smaller for
different names.
# use a loop.
#set var_names to use in loop.
variable_names_model <- c("knows_daan",
"knows_kevin",
"knows_edwin",
"knows_albert",
"knows_emma",
"knows_linda",
"knows_ingrid",
"knows_willemina",
"knows_mohammed",
"knows_fatima",
"knows_ibrahim",
"knows_esra")
#start analysis loop
model_results <- list()
for(i in 1:length(variable_names_model)) {#i = 1
fm <- as.formula(paste(variable_names_model[[i]], "~", "migration_background_fac"))
model_results[[i]] <- MASS::glm.nb(fm,
data = nells_df)
}
#clean output with tidy r
model_results_df_list <- model_results %>%
purrr::map(.x =.,
.f = ~ broom::tidy(.x))
#add var_names to model_results
for(i in 1:length(model_results_df_list)){
model_results_df_list[[i]] <- model_results_df_list[[i]] %>%
mutate(dep_var = variable_names_model[i])
}
#combine model dfs.
model_results_df <- model_results_df_list %>%
bind_rows()
#set correct variable names
model_results_df <- model_results_df %>%
mutate(
term = case_when(
str_detect(term, "2nd gen Moroccan") ~ "2nd gen Moroccan-Dutch",
str_detect(term, "2nd gen Turkish") ~ "2nd gen Turkish-Dutch",
str_detect(term, "1st gen Moroccan") ~ "1st gen Moroccan-Dutch",
str_detect(term, "1st gen Turkish") ~ "1st gen Turkish-Dutch",
term == "(Intercept)" ~ "Intercept"
)
)
#Set correct names
correct_names <- model_results_df %>%
pull(dep_var) %>%
str_replace(., pattern = "knows_", replacement = "") %>%
str_to_title()
#drop old names and add the correct names
model_results_df <- model_results_df %>%
select(-dep_var) %>%
mutate(dep_var = correct_names)
Predicted counts plot for names and ethnicity
pred_nb_f <- function(nb_model, names){#nb_model = model_results[[1]], names = variable_names_model[[1]]
pred <- predict(object = nb_model,
type = "response",
se.fit = T
)
plot_df <- nells_df %>%
select(id, migration_background_fac) %>%
bind_cols(pred) %>%
mutate(dep_var = names)
return(plot_df)
}
model_pred_list <- map2(.x = model_results,
.y = variable_names_model,
.f = ~pred_nb_f(nb_model = .x,
names = .y))
model_pred_df <- model_pred_list %>%
bind_rows()
#Set correct names
correct_names <- model_pred_df %>%
pull(dep_var) %>%
str_replace(., pattern = "knows_", replacement = "") %>%
str_to_title()
#drop old names and add the correct names
model_pred_df <- model_pred_df %>%
select(-dep_var) %>%
mutate(dep_var = correct_names)
#set custom pallet
pal <- c("#66c2a5",
"#fc8d62",
"#fc8d62",
"#8da0cb",
"#8da0cb")
#crete plot with minority names
ethnic_names_pred_plot <- model_pred_df %>%
filter(dep_var %in% c("Mohammed",
"Fatima",
"Ibrahim",
"Esra")) %>%
ggplot(aes(x = dep_var,
y = fit,
shape = migration_background_fac)) +
geom_linerange(aes(ymin = fit - (se.fit *1.96),
ymax = fit + (se.fit *1.96)),
position = position_dodge(width = 1)) +
geom_point(aes(colour = migration_background_fac,
fill = migration_background_fac),
position = position_dodge(width = 1)) +
facet_wrap(vars(dep_var),
scales = "free",
ncol = 2) +
scale_colour_manual(
values = pal,
aesthetics = c("colour", "fill")
) +
scale_shape_manual(values = c(21,22,24,22,24)) +
theme(
panel.background = element_rect(fill = "#FFFFFF",
colour = "black"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_blank(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#FFFFFF"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(x = "",
y = "",
colour = "",
shape = "",
fill = ""
)
#set custom pallet
pal <- c("#66c2a5",
"#fc8d62",
"#fc8d62",
"#8da0cb",
"#8da0cb")
#create plot for majority names
non_ethnic_names_pred_plot <- model_pred_df %>%
filter(!dep_var %in% c("Mohammed",
"Fatima",
"Ibrahim",
"Esra")) %>%
ggplot(aes(x = dep_var,
y = fit,
shape = migration_background_fac)) +
geom_linerange(aes(ymin = fit - (se.fit *1.96),
ymax = fit + (se.fit *1.96)),
position = position_dodge(width = 1)) +
geom_point(aes(colour = migration_background_fac,
fill = migration_background_fac),
position = position_dodge(width = 1)) +
facet_wrap(vars(dep_var),
scales = "free_x",
ncol = 2) +
scale_colour_manual(
values = pal,
aesthetics = c("colour", "fill")
) +
scale_shape_manual(values = c(21,22,24,22,24)) +
theme(
panel.background = element_rect(fill = "#FFFFFF",
colour = "black"),
plot.background = element_rect(fill = "#FFFFFF"),
panel.grid = element_line(colour = "grey"),
text = element_text(family = "sans", size = 12),
axis.title.x = element_text(hjust = 0.9, face = "bold"),
axis.text.x = element_blank(),
axis.line = element_blank(),
axis.title.y = element_text(hjust = 0.9, face = "bold"),
axis.ticks = element_blank(),
strip.background = element_rect(fill = "#FFFFFF"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.background = element_rect(fill = "#FFFFFF"),
legend.key = element_rect(fill = "#FFFFFF")
) +
labs(x = "",
y = "",
colour = "",
shape = "",
fill = ""
)
#Combine plots in multipanel plot
names_pred_het_panel <- ethnic_names_pred_plot +
non_ethnic_names_pred_plot +
plot_annotation(
tag_levels = 'a',
tag_prefix = '(',
tag_suffix = ')'
) +
plot_layout(ncol = 1,
heights = c(1, 3),
guides = 'collect',
) &
theme(legend.position = c(-2,-5),
legend.direction = 'vertical')
#preview plot
names_pred_het_panel
#save plot
ggsave(names_pred_het_panel,
file = "data_analysis/plots/descriptive/names_het_pred_panel.jpg",
width = 6,
height = 8,
dpi = 320)
