Clean alter data and remove faulty alter data.
#packages
library(tidyverse)
#load
load(file = "data/data-processed/liss_merged/lisscdn_afterrel_V4_240624.Rdata")
#create check data
#first create a long format
alter_long <- liss_long %>%
select(nomem_encr,
survey_wave,
starts_with("alter_id"),
starts_with("rel_alter"),
starts_with("gender_alter")) %>%
rename(gender_alter.1 = gender_alter1,
gender_alter.2 = gender_alter2,
gender_alter.3 = gender_alter3,
gender_alter.4 = gender_alter4,
gender_alter.5 = gender_alter5,
rel_alter.1 = rel_alter1,
rel_alter.2 = rel_alter2,
rel_alter.3 = rel_alter3,
rel_alter.4 = rel_alter4,
rel_alter.5 = rel_alter5,
alter_id.1 = alter_id_1,
alter_id.2 = alter_id_2,
alter_id.3 = alter_id_3,
alter_id.4 = alter_id_4,
alter_id.5 = alter_id_5) %>%
pivot_longer(cols = c(starts_with("alter_id"),
starts_with("gender"),
starts_with("rel")),
names_to = c("measure", "alter"),
values_to = "value",
names_pattern = "(.+)\\.(.+)") %>%
pivot_wider(names_from = measure,
values_from = value)
#create dyad id and remove na's on alter_id
check_data <- alter_long %>%
filter(!is.na(alter_id)) %>%
mutate(dyad_id = paste0(nomem_encr, alter_id)) %>%
select(1,7,4,2,3,5,6) %>%
arrange(dyad_id, survey_wave)
Check wether an alter switched gender. Although it is possible, it is still highly unlikely.
#create mean of gender over time. If not 0 or 1, then we have a problem.
check_data_gender <- check_data %>%
arrange(nomem_encr, dyad_id, survey_wave) %>%
group_by(dyad_id) %>%
mutate(mean_gender = mean(gender_alter)) %>%
ungroup()
#we lose 461 respondents due to gender transitions
gender_fault_ids <- check_data_gender %>%
filter(mean_gender != 2 & mean_gender != 1) %>%
select(nomem_encr) %>%
distinct()
Check wheter alters have weird and impposible relationship transformations
#Use paste0 to create a new variable which contain unique transition combinations
#then we can actually filter out the impossible combinations.
check_data <- check_data %>%
group_by(dyad_id) %>%
select(nomem_encr, dyad_id, survey_wave, rel_alter) %>%
filter(!is.na(rel_alter)) %>%
mutate(rel_check = paste0(rel_alter, lag(rel_alter))) %>% #create unique variable
ungroup()
#set impossible combinations. See codebook for the meaning of these.
impossible_combinations <- c(12,13,14,15,16,17,18,110,
21,23,24,25,26,27,28,29,210,
31,32,34,35,36,37,38,39,310,
41,42,43,45,46,47,48,49,410,
51,52,53,54,56,57,58,59,510,
62,63,64,65,
72,73,74,75,
82,83,84,85,
92,93,94,95,
102,103,104,105)
#we lose 1000 ego over impossible combinations
#select the rows with impossible combinations
faulty_dyads_ids <- check_data %>%
filter(rel_check %in% impossible_combinations) %>%
select(nomem_encr) %>%
distinct() %>%
pull()
Filter networks withimpossible gender or relationship transformations.
#filter out the networks with impossible combinations
liss_long <- liss_long %>%
filter(!nomem_encr %in% faulty_dyads_ids) %>% #remove dyads_ids
filter(!nomem_encr %in% gender_fault_ids) #remove gender_ids
#filter out from wide file
liss_wide <- liss_wide %>%
filter(!nomem_encr %in% faulty_dyads_ids) %>% #remove dyads_ids
filter(!nomem_encr %in% gender_fault_ids) #remove gender_ids
#clean global environment.
#only keep the wide and long files.
rm(list=ls()[! ls() %in% c("liss_wide","liss_long")])
#save data
save.image(file = "data/data-processed/liss_merged/lisscdn_cleaned_V4_240624.Rdata")
Copyright © 2024 Jeroense Thijmen