Prepare Cross-Lagged data for the RICLPM.
#load packages
library(tidyverse)
#disable the scientific notation in R (else all the id's will be in scientific notation)
options(scipen = 999)
#data import
load("data/data-processed/liss_merged/lisscdn_cleaned_V4_240624.Rdata")
#first we need to create a dataset in which we deleted observations that did not participate in a survey.
#count function.
nafinder <- function(x) {
number_na <- sum(is.na(x))
return(number_na)
}
#count na's per observation.
liss_long$number_na <- apply(liss_long, 1, nafinder)
table(liss_long$number_na)
0 1 2 3 4 5 6 7 8 9 10 11 12
276 615 708 719 638 884 1228 1355 1333 1135 944 709 509 13 14 15 16 17 18 19 20 21 22 23 24 25 364 226 153 254 343 436 327 296 449 607 667 641 498 26 27 28 29 30 31 32 33 34 35 36 37 38 448 317 201 142 103 295 406 400 417 308 699 1007 1096 39 40 41 42 43 44 45 46 47 48 49 50 51 1017 862 736 513 340 246 305 285 262 192 151 587 851 52 53 54 55 56 57 58 59 60 61 62 63 64 987 828 705 486 278 213 150 124 106 64 39 729 993 65 66 67 68 69 70 71 72 73 74 75 76 77 930 970 649 380 220 178 179 144 149 102 1635 1877 2032 78 79 80 81 82 83 84 85 86 87 88 89 90 1746 987 723 373 256 163 152 159 129 96 64 42 18 91 92 93 94 95 96 97 98 99 100 101 102 103 15 9 13 9 8 1 101 200 199 149 106 65 19 104 105 106 107 108 109 110 111 112 113 114 115 116 18 3 17 13 23 225 3603 4483 2813 1562 562 243 2 119 120 121 122 130 131 132 9 19 8 10 1 23 54031
#first let's delete observations that have 126 missings on the 128 variables.
liss_long_na_selection <- liss_long %>%
filter(number_na < 132) %>%
mutate(survey_wave = as.numeric(survey_wave)) %>%
arrange(nomem_encr, survey_wave)
data_long <- liss_long_na_selection
Age seleciton for ego’s. Selection = Older than 24 years old.
#from 10,647 to 8650. So we lose 2000 respondents in this way
age_test <- data_long %>%
group_by(nomem_encr) %>%
top_n(-1, survey_wave) %>%
filter(leeftijd > 24) %>%
pull(nomem_encr)
#select those respondents who are older than 25.
data_long <- data_long %>%
filter(nomem_encr %in% age_test)
#reset to tbl to df.
data_long <- data.frame(data_long)
#select variables
#select the variables that I want for the analysis.
data_long <-
data_long[, c(
"nomem_encr",
"oplmet",
"leeftijd",
"nettoink",
"geslacht",
"origin",
"sted",
"burgstat",
"belbezig",
"origin_alter1",
"origin_alter2",
"origin_alter3",
"origin_alter4",
"origin_alter5",
"rel_alter1",
"rel_alter2",
"rel_alter3",
"rel_alter4",
"rel_alter5",
"educ_alter1",
"educ_alter2",
"educ_alter3",
"educ_alter4",
"educ_alter5",
"poltalk_alter1",
"poltalk_alter2",
"poltalk_alter3",
"poltalk_alter4",
"poltalk_alter5",
"talk_alter1",
"talk_alter2",
"talk_alter3",
"talk_alter4",
"talk_alter5",
"gender_alter1",
"gender_alter2",
"gender_alter3",
"gender_alter4",
"gender_alter5",
"rl_alter1",
"rl_alter2",
"rl_alter3",
"rl_alter4",
"rl_alter5",
"age_alter1",
"age_alter2",
"age_alter3",
"age_alter4",
"age_alter5",
"survey_wave",
"alter_id_1",
"alter_id_2",
"alter_id_3",
"alter_id_4",
"alter_id_5",
"immigrants",
"eu_integration",
"income_diff",
"work_a1",
"work_a2",
"work_a3",
"work_a4",
"work_a5",
"length_1",
"length_2",
"length_3",
"length_4",
"length_5"
)]
#rename the variables so they are not longer than 8 characters.(else I wont be able to distinguish them in Mplus)
names(data_long) <-
c(
"nomem_encr",
"educ",
"age",
"inc_ln",
"gender",
"origin",
"urban",
"burgstat",
"belbezig",
"orig_a.1",
"orig_a.2",
"orig_a.3",
"orig_a.4",
"orig_a.5",
"rel_a.1",
"rel_a.2",
"rel_a.3",
"rel_a.4",
"rel_a.5",
"educ_a.1",
"educ_a.2",
"educ_a.3",
"educ_a.4",
"educ_a.5",
"poltalk_a.1",
"poltalk_a.2",
"poltalk_a.3",
"poltalk_a.4",
"poltalk_a.5",
"talk_a.1",
"talk_a.2",
"talk_a.3",
"talk_a.4",
"talk_a.5",
"g_a.1",
"g_a.2",
"g_a.3",
"g_a.4",
"g_a.5",
"rl_a.1",
"rl_a.2",
"rl_a.3",
"rl_a.4",
"rl_a.5",
"age_a.1",
"age_a.2",
"age_a.3",
"age_a.4",
"age_a.5",
"wave",
"alter_id.1",
"alter_id.2",
"alter_id.3",
"alter_id.4",
"alter_id.5",
"cult_inc",
"eu",
"inc_diff",
"work_a.1",
"work_a.2",
"work_a.3",
"work_a.4",
"work_a.5",
"length_a.1",
"length_a.2",
"length_a.3",
"length_a.4",
"length_a.5"
)
#keep only the dfs that I need
rm(list = ls()[!ls() %in% c("data_long",
"liss_long",
"liss_wide",
"sample",
"sample_notchosen")])
#save data
save.image("data/data-processed/lisscdn_cl-ready_240816.Rdata")
Copyright © 2024 Jeroense Thijmen