Prepare Cross-Lagged data for the RICLPM.
#load packages
library(tidyverse)
#disable the scientific notation in R (else all the id's will be in scientific notation)
options(scipen = 999)
#data import
load("data/data-processed/liss_merged/lisscdn_cleaned_V4_240624.Rdata")
#first we need to create a dataset in which we deleted observations that did not participate in a survey.
#count function.
nafinder <- function(x) {
number_na <- sum(is.na(x))
return(number_na)
}
#count na's per observation.
liss_long$number_na <- apply(liss_long, 1, nafinder)
table(liss_long$number_na)
0 1 2 3 4 5 6 7 8 9 10 11 12

#first let's delete observations that have 126 missings on the 128 variables.
liss_long_na_selection <- liss_long %>%
filter(number_na < 132) %>%
mutate(survey_wave = as.numeric(survey_wave)) %>%
arrange(nomem_encr, survey_wave)
data_long <- liss_long_na_selection
Age seleciton for ego’s. Selection = Older than 24 years old.
#from 10,647 to 8650. So we lose 2000 respondents in this way
age_test <- data_long %>%
group_by(nomem_encr) %>%
top_n(-1, survey_wave) %>%
filter(leeftijd > 24) %>%
pull(nomem_encr)
#select those respondents who are older than 25.
data_long <- data_long %>%
filter(nomem_encr %in% age_test)
#reset to tbl to df.
data_long <- data.frame(data_long)
#select variables
#select the variables that I want for the analysis.
data_long <-
data_long[, c(
"nomem_encr",
"oplmet",
"leeftijd",
"nettoink",
"geslacht",
"origin",
"sted",
"burgstat",
"belbezig",
"origin_alter1",
"origin_alter2",
"origin_alter3",
"origin_alter4",
"origin_alter5",
"rel_alter1",
"rel_alter2",
"rel_alter3",
"rel_alter4",
"rel_alter5",
"educ_alter1",
"educ_alter2",
"educ_alter3",
"educ_alter4",
"educ_alter5",
"poltalk_alter1",
"poltalk_alter2",
"poltalk_alter3",
"poltalk_alter4",
"poltalk_alter5",
"talk_alter1",
"talk_alter2",
"talk_alter3",
"talk_alter4",
"talk_alter5",
"gender_alter1",
"gender_alter2",
"gender_alter3",
"gender_alter4",
"gender_alter5",
"rl_alter1",
"rl_alter2",
"rl_alter3",
"rl_alter4",
"rl_alter5",
"age_alter1",
"age_alter2",
"age_alter3",
"age_alter4",
"age_alter5",
"survey_wave",
"alter_id_1",
"alter_id_2",
"alter_id_3",
"alter_id_4",
"alter_id_5",
"immigrants",
"eu_integration",
"income_diff",
"work_a1",
"work_a2",
"work_a3",
"work_a4",
"work_a5",
"length_1",
"length_2",
"length_3",
"length_4",
"length_5"
)]
#rename the variables so they are not longer than 8 characters.(else I wont be able to distinguish them in Mplus)
names(data_long) <-
c(
"nomem_encr",
"educ",
"age",
"inc_ln",
"gender",
"origin",
"urban",
"burgstat",
"belbezig",
"orig_a.1",
"orig_a.2",
"orig_a.3",
"orig_a.4",
"orig_a.5",
"rel_a.1",
"rel_a.2",
"rel_a.3",
"rel_a.4",
"rel_a.5",
"educ_a.1",
"educ_a.2",
"educ_a.3",
"educ_a.4",
"educ_a.5",
"poltalk_a.1",
"poltalk_a.2",
"poltalk_a.3",
"poltalk_a.4",
"poltalk_a.5",
"talk_a.1",
"talk_a.2",
"talk_a.3",
"talk_a.4",
"talk_a.5",
"g_a.1",
"g_a.2",
"g_a.3",
"g_a.4",
"g_a.5",
"rl_a.1",
"rl_a.2",
"rl_a.3",
"rl_a.4",
"rl_a.5",
"age_a.1",
"age_a.2",
"age_a.3",
"age_a.4",
"age_a.5",
"wave",
"alter_id.1",
"alter_id.2",
"alter_id.3",
"alter_id.4",
"alter_id.5",
"cult_inc",
"eu",
"inc_diff",
"work_a.1",
"work_a.2",
"work_a.3",
"work_a.4",
"work_a.5",
"length_a.1",
"length_a.2",
"length_a.3",
"length_a.4",
"length_a.5"
)
#keep only the dfs that I need
rm(list = ls()[!ls() %in% c("data_long",
"liss_long",
"liss_wide",
"sample",
"sample_notchosen")])
#save data
save.image("data/data-processed/lisscdn_cl-ready_240816.Rdata")
Copyright © 2024 Jeroense Thijmen