Data import of the different LISS modules
#create directory name
dir <- file.path("data",
"data-processed",
"liss_merged",
"modules")
#create directory
if(!dir.exists(dir)) {
dir.create(dir)
}
LISS leisure and integration module
#import the different liss files from their directory.
#set data name
file_name <- file.path(dir,"leisure_integration_combined.Rdata")
#conditional on file.exists
if(!file.exists(file_name)) {
liss_files <- list.files("data/data-raw/leisure_integration/", full.names = T) %>%
map(read_sav)
#For a full outer join.
liss_merged <- liss_files %>%
reduce(full_join, by = 'nomem_encr') %>%
arrange(nomem_encr)
#save merged data
save(liss_merged,
file = file_name)
} else{
load(file_name)
}
This results in a df with 14473 observations and 4909 variables.
Politics and values module
#set data name
file_name <- file.path(dir, "polval_combined.Rdata")
#conditional on file.exists
if (!file.exists(file_name)) {
#import the data files.
liss_files <-
list.files("data/data-raw/pol_values_data/", full.names = T) %>%
map(read_sav)
#merge the data into one file.
liss_pol <- liss_files %>%
reduce(full_join, by = 'nomem_encr') %>%
#order on nomem_encr
arrange(nomem_encr)
#save data
save(liss_pol,
file = file_name)
} else{
load(file_name)
}
This results in a df with 14040 observations of 1978 variables.
#remove the household id variables from the liss_pol and liss_merged file.
# they are incomplete as in some waves they are not present. I will add them later with the background data.
liss_pol[,c("nohouse_encr", "nohouse_encr.x", "nohouse_encr.y", "nohouse_encr.x.x", "nohouse_encr.y.y")] <- NULL
liss_merged[,c("nohouse_encr", "nohouse_encr.x", "nohouse_encr.y", "nohouse_encr.x.x", "nohouse_encr.y.y")] <- NULL
LISS CDN alter id
#set data name
file_name <- file.path(dir, "alterids_combined.Rdata")
#conditional on file.exists
if (!file.exists(file_name)) {
#import the data files.
liss_files <- list.files("data/data-raw/alter_ids/", full.names = T) %>%
map(read_sav)
#just to be sure, order the files on nomem_encr and add suffixes to the data.
#Then we can correctly identify the different waves.
liss_files <- lapply(1:11, function(i) {
data <- liss_files[[i]]
#Now I add suffixes to all the variables except the ID var, so we can correctly Identify the wave to which a variable belongs in the merged data. Otherwise we will have 11 sets of the same variables with a random .x or .y.
names(data)[-1] <- paste0(names(data)[-1], sep = ".", c(1:11)[i])
return(data)
})
#merge the data
liss_alter <- liss_files %>%
reduce(full_join, by = 'nomem_encr') %>%
arrange(nomem_encr)
#save data
save(liss_alter,
file = file_name)
} else{
load(file_name)
}
LISS background
#set data name
file_name <- file.path(dir, "background_combined.Rdata")
#conditional on file.exists
if (!file.exists(file_name)) {
#import the background variable data.
#import data files.
liss_files <- list.files("data/data-raw/background/", full.names = T) %>%
map(read_sav)
#just to be sure, order the files on nomem_encr and add suffixes to the data
liss_files <- lapply(1:11, function(i) {
data <- liss_files[[i]][base::order(liss_files[[i]]$nomem_encr),]
names(data)[-1] <- paste0(names(data)[-1], sep = ".", c(1:11)[i])
return(data)
})
#merge the data.
liss_background <- liss_files %>%
reduce(full_join, by = 'nomem_encr') %>%
arrange(nomem_encr)
#save data
save(liss_background,
file = file_name)
} else(
load(file_name)
)
[1] “liss_background”
Merge all files
#merge all the data with a full outer join into one file.
#okay, let's merge the liss_merged, liss_alter and the politics and values waves.
liss <- liss_merged %>%
full_join(liss_alter, by = "nomem_encr") %>%
full_join(liss_pol, by = "nomem_encr") %>%
full_join(liss_background, by = "nomem_encr") %>%
arrange(nomem_encr)
Full dataframe consists of 25306 observations and 7271 variables
Export data
The final DF consists of 25306 observations with 7271 variables.
#clean the global environment.
rm(list=ls()[! ls() %in% c("liss")])
save(liss,
file = file.path("data",
"data-processed",
"liss_merged",
"liss_merged_raw.Rdata"))
LS0tDQp0aXRsZTogIk1lcmdpbmcgTElTUyBkYXRhZmlsZXMiDQphdXRob3I6ICJUaGlqbWVuIEplcm9lbnNlIg0KZGF0ZTogIkxhc3QgY29tcGlsZWQgb24gYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiBUUlVFDQogICAgdG9jX2RlcHRoOiAzDQogICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgY29kZV9mb2xkaW5nOiBzaG93DQogICAgY29kZV9kb3dubG9hZDogVFJVRQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KA0KICBjYWNoZSA9IFRSVUUsDQogIG1lc3NhZ2UgPSBGQUxTRSwNCiAgd2FybmluZyA9IEZBTFNFLA0KICByZXN1bHRzID0gImFzaXMiLA0KICBmaWcuYWxpZ24gPSAiY2VudGVyIg0KKQ0KYGBgDQoNCiMjIEludHJvZHVjdGlvbg0KDQpHb2FscyBvZiB0aGlzIHNjcmlwdCBpcyB0byBtZXJnZSBhbGwgdGhlIHJlbGV2YW50IExJU1MgY29yZSBtb2R1bGUgZmlsZXMgaW50byBvbmUgZGF0YWZyYW1lLg0KDQojIyBQYWNrYWdlcw0KYGBge3IgcGFja2FnZXN9DQojbG9hZCBwYWNrYWdlcw0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocHVycnIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeSh0aWR5c2VsZWN0KQ0KbGlicmFyeShoYXZlbikNCmxpYnJhcnkobWFncml0dHIpDQpgYGANCg0KDQojIyBEYXRhIGltcG9ydCBvZiB0aGUgZGlmZmVyZW50IExJU1MgbW9kdWxlcw0KDQpgYGB7ciBpbml0aWFsaXppbmd9DQojY3JlYXRlIGRpcmVjdG9yeSBuYW1lDQpkaXIgPC0gZmlsZS5wYXRoKCJkYXRhIiwNCiAgICAgICAgICAiZGF0YS1wcm9jZXNzZWQiLA0KICAgICAgICAgICJsaXNzX21lcmdlZCIsDQogICAgICAgICAgIm1vZHVsZXMiKQ0KDQojY3JlYXRlIGRpcmVjdG9yeQ0KaWYoIWRpci5leGlzdHMoZGlyKSkgew0KICBkaXIuY3JlYXRlKGRpcikNCn0NCmBgYA0KDQoNCiMjIyBMSVNTIGxlaXN1cmUgYW5kIGludGVncmF0aW9uIG1vZHVsZQ0KDQpgYGB7ciBsZWlzdXJlIGFuZCBpbnRlZ3JhdGlvbn0NCiNpbXBvcnQgdGhlIGRpZmZlcmVudCBsaXNzIGZpbGVzIGZyb20gdGhlaXIgZGlyZWN0b3J5LiANCg0KI3NldCBkYXRhIG5hbWUNCmZpbGVfbmFtZSA8LSBmaWxlLnBhdGgoZGlyLCJsZWlzdXJlX2ludGVncmF0aW9uX2NvbWJpbmVkLlJkYXRhIikNCg0KI2NvbmRpdGlvbmFsIG9uIGZpbGUuZXhpc3RzDQppZighZmlsZS5leGlzdHMoZmlsZV9uYW1lKSkgew0KbGlzc19maWxlcyA8LSBsaXN0LmZpbGVzKCJkYXRhL2RhdGEtcmF3L2xlaXN1cmVfaW50ZWdyYXRpb24vIiwgZnVsbC5uYW1lcyA9IFQpICU+JQ0KICBtYXAocmVhZF9zYXYpDQoNCiNGb3IgYSBmdWxsIG91dGVyIGpvaW4uDQpsaXNzX21lcmdlZCA8LSBsaXNzX2ZpbGVzICU+JQ0KICByZWR1Y2UoZnVsbF9qb2luLCBieSA9ICdub21lbV9lbmNyJykgJT4lDQogIGFycmFuZ2Uobm9tZW1fZW5jcikNCg0KI3NhdmUgbWVyZ2VkIGRhdGENCnNhdmUobGlzc19tZXJnZWQsDQogICAgIGZpbGUgPSBmaWxlX25hbWUpDQp9IGVsc2V7DQogIGxvYWQoZmlsZV9uYW1lKQ0KfQ0KDQpgYGANCg0KVGhpcyByZXN1bHRzIGluIGEgZGYgd2l0aCAxNDQ3MyBvYnNlcnZhdGlvbnMgYW5kIDQ5MDkgdmFyaWFibGVzLiANCg0KIyMjIFBvbGl0aWNzIGFuZCB2YWx1ZXMgbW9kdWxlDQoNCmBgYHtyIGltcG9ydCBwb2x2YWwgbW9kdWxlIH0NCiNzZXQgZGF0YSBuYW1lDQpmaWxlX25hbWUgPC0gZmlsZS5wYXRoKGRpciwgInBvbHZhbF9jb21iaW5lZC5SZGF0YSIpDQoNCiNjb25kaXRpb25hbCBvbiBmaWxlLmV4aXN0cw0KaWYgKCFmaWxlLmV4aXN0cyhmaWxlX25hbWUpKSB7DQogICNpbXBvcnQgdGhlIGRhdGEgZmlsZXMuDQogIGxpc3NfZmlsZXMgPC0NCiAgICBsaXN0LmZpbGVzKCJkYXRhL2RhdGEtcmF3L3BvbF92YWx1ZXNfZGF0YS8iLCBmdWxsLm5hbWVzID0gVCkgJT4lDQogICAgbWFwKHJlYWRfc2F2KQ0KICANCiAgI21lcmdlIHRoZSBkYXRhIGludG8gb25lIGZpbGUuDQogIGxpc3NfcG9sIDwtIGxpc3NfZmlsZXMgJT4lDQogICAgcmVkdWNlKGZ1bGxfam9pbiwgYnkgPSAnbm9tZW1fZW5jcicpICU+JQ0KICAgICNvcmRlciBvbiBub21lbV9lbmNyDQogICAgYXJyYW5nZShub21lbV9lbmNyKQ0KICANCiAgI3NhdmUgZGF0YQ0KICBzYXZlKGxpc3NfcG9sLA0KICAgICAgIGZpbGUgPSBmaWxlX25hbWUpDQp9IGVsc2V7DQogIGxvYWQoZmlsZV9uYW1lKQ0KfQ0KYGBgDQoNClRoaXMgcmVzdWx0cyBpbiBhIGRmIHdpdGggMTQwNDAgb2JzZXJ2YXRpb25zIG9mIDE5NzggdmFyaWFibGVzLiANCg0KYGBge3IgcmVtb3ZlIGhoIGlkc30NCiNyZW1vdmUgdGhlIGhvdXNlaG9sZCBpZCB2YXJpYWJsZXMgZnJvbSB0aGUgbGlzc19wb2wgYW5kIGxpc3NfbWVyZ2VkIGZpbGUuDQojIHRoZXkgYXJlIGluY29tcGxldGUgYXMgaW4gc29tZSB3YXZlcyB0aGV5IGFyZSBub3QgcHJlc2VudC4gSSB3aWxsIGFkZCB0aGVtIGxhdGVyIHdpdGggdGhlIGJhY2tncm91bmQgZGF0YS4gDQpsaXNzX3BvbFssYygibm9ob3VzZV9lbmNyIiwgIm5vaG91c2VfZW5jci54IiwgIm5vaG91c2VfZW5jci55IiwgIm5vaG91c2VfZW5jci54LngiLCAibm9ob3VzZV9lbmNyLnkueSIpXSA8LSBOVUxMDQpsaXNzX21lcmdlZFssYygibm9ob3VzZV9lbmNyIiwgIm5vaG91c2VfZW5jci54IiwgIm5vaG91c2VfZW5jci55IiwgIm5vaG91c2VfZW5jci54LngiLCAibm9ob3VzZV9lbmNyLnkueSIpXSA8LSBOVUxMDQoNCmBgYA0KDQojIyMgTElTUyBDRE4gYWx0ZXIgaWQNCg0KYGBge3IgY2RuIGFsdGVyIGlkc30NCiNzZXQgZGF0YSBuYW1lDQpmaWxlX25hbWUgPC0gZmlsZS5wYXRoKGRpciwgImFsdGVyaWRzX2NvbWJpbmVkLlJkYXRhIikNCg0KI2NvbmRpdGlvbmFsIG9uIGZpbGUuZXhpc3RzDQppZiAoIWZpbGUuZXhpc3RzKGZpbGVfbmFtZSkpIHsNCiNpbXBvcnQgdGhlIGRhdGEgZmlsZXMuDQpsaXNzX2ZpbGVzIDwtIGxpc3QuZmlsZXMoImRhdGEvZGF0YS1yYXcvYWx0ZXJfaWRzLyIsIGZ1bGwubmFtZXMgPSBUKSAlPiUNCiAgbWFwKHJlYWRfc2F2KQ0KDQojanVzdCB0byBiZSBzdXJlLCBvcmRlciB0aGUgZmlsZXMgb24gbm9tZW1fZW5jciBhbmQgYWRkIHN1ZmZpeGVzIHRvIHRoZSBkYXRhLg0KI1RoZW4gd2UgY2FuIGNvcnJlY3RseSBpZGVudGlmeSB0aGUgZGlmZmVyZW50IHdhdmVzLiANCmxpc3NfZmlsZXMgPC0gbGFwcGx5KDE6MTEsIGZ1bmN0aW9uKGkpIHsNCiAgZGF0YSA8LSBsaXNzX2ZpbGVzW1tpXV0NCiAgI05vdyBJIGFkZCBzdWZmaXhlcyB0byBhbGwgdGhlIHZhcmlhYmxlcyBleGNlcHQgdGhlIElEIHZhciwgc28gd2UgY2FuIGNvcnJlY3RseSBJZGVudGlmeSB0aGUgd2F2ZSB0byB3aGljaCBhIHZhcmlhYmxlIGJlbG9uZ3MgaW4gdGhlIG1lcmdlZCBkYXRhLiBPdGhlcndpc2Ugd2Ugd2lsbCBoYXZlIDExIHNldHMgb2YgdGhlIHNhbWUgdmFyaWFibGVzIHdpdGggYSByYW5kb20gLnggb3IgLnkuIA0KICBuYW1lcyhkYXRhKVstMV0gPC0gcGFzdGUwKG5hbWVzKGRhdGEpWy0xXSwgc2VwID0gIi4iLCBjKDE6MTEpW2ldKQ0KICByZXR1cm4oZGF0YSkNCn0pDQoNCiNtZXJnZSB0aGUgZGF0YQ0KbGlzc19hbHRlciA8LSBsaXNzX2ZpbGVzICU+JQ0KICByZWR1Y2UoZnVsbF9qb2luLCBieSA9ICdub21lbV9lbmNyJykgJT4lDQogIGFycmFuZ2Uobm9tZW1fZW5jcikNCg0KI3NhdmUgZGF0YQ0Kc2F2ZShsaXNzX2FsdGVyLCANCiAgICAgZmlsZSA9IGZpbGVfbmFtZSkNCn0gZWxzZXsNCiAgbG9hZChmaWxlX25hbWUpDQp9DQpgYGANCg0KIyMjIExJU1MgYmFja2dyb3VuZA0KDQpgYGB7ciBsaXNzIGJhY2tncm91bmR9DQojc2V0IGRhdGEgbmFtZQ0KZmlsZV9uYW1lIDwtIGZpbGUucGF0aChkaXIsICJiYWNrZ3JvdW5kX2NvbWJpbmVkLlJkYXRhIikNCg0KI2NvbmRpdGlvbmFsIG9uIGZpbGUuZXhpc3RzDQppZiAoIWZpbGUuZXhpc3RzKGZpbGVfbmFtZSkpIHsNCiNpbXBvcnQgdGhlIGJhY2tncm91bmQgdmFyaWFibGUgZGF0YS4gDQojaW1wb3J0IGRhdGEgZmlsZXMuDQpsaXNzX2ZpbGVzIDwtIGxpc3QuZmlsZXMoImRhdGEvZGF0YS1yYXcvYmFja2dyb3VuZC8iLCBmdWxsLm5hbWVzID0gVCkgJT4lDQogIG1hcChyZWFkX3NhdikNCg0KI2p1c3QgdG8gYmUgc3VyZSwgb3JkZXIgdGhlIGZpbGVzIG9uIG5vbWVtX2VuY3IgYW5kIGFkZCBzdWZmaXhlcyB0byB0aGUgZGF0YSANCmxpc3NfZmlsZXMgPC0gbGFwcGx5KDE6MTEsIGZ1bmN0aW9uKGkpIHsNCiAgZGF0YSA8LSBsaXNzX2ZpbGVzW1tpXV1bYmFzZTo6b3JkZXIobGlzc19maWxlc1tbaV1dJG5vbWVtX2VuY3IpLF0NCiAgbmFtZXMoZGF0YSlbLTFdIDwtIHBhc3RlMChuYW1lcyhkYXRhKVstMV0sIHNlcCA9ICIuIiwgYygxOjExKVtpXSkNCiAgcmV0dXJuKGRhdGEpDQp9KQ0KDQojbWVyZ2UgdGhlIGRhdGEuIA0KbGlzc19iYWNrZ3JvdW5kIDwtIGxpc3NfZmlsZXMgJT4lDQogIHJlZHVjZShmdWxsX2pvaW4sIGJ5ID0gJ25vbWVtX2VuY3InKSAlPiUNCiAgYXJyYW5nZShub21lbV9lbmNyKQ0KDQojc2F2ZSBkYXRhDQpzYXZlKGxpc3NfYmFja2dyb3VuZCwNCiAgICAgZmlsZSA9IGZpbGVfbmFtZSkNCn0gZWxzZSgNCiAgbG9hZChmaWxlX25hbWUpDQopDQpgYGANCg0KIyMjIE1lcmdlIGFsbCBmaWxlcw0KYGBge3IgY29tYmluZSBsaXNzIG1vZHVsZXMgfQ0KI21lcmdlIGFsbCB0aGUgZGF0YSB3aXRoIGEgZnVsbCBvdXRlciBqb2luIGludG8gb25lIGZpbGUuIA0KI29rYXksIGxldCdzIG1lcmdlIHRoZSBsaXNzX21lcmdlZCwgbGlzc19hbHRlciBhbmQgdGhlIHBvbGl0aWNzIGFuZCB2YWx1ZXMgd2F2ZXMuDQpsaXNzIDwtIGxpc3NfbWVyZ2VkICU+JQ0KICBmdWxsX2pvaW4obGlzc19hbHRlciwgYnkgPSAibm9tZW1fZW5jciIpICU+JQ0KICBmdWxsX2pvaW4obGlzc19wb2wsIGJ5ID0gIm5vbWVtX2VuY3IiKSAlPiUNCiAgZnVsbF9qb2luKGxpc3NfYmFja2dyb3VuZCwgYnkgPSAibm9tZW1fZW5jciIpICU+JQ0KICBhcnJhbmdlKG5vbWVtX2VuY3IpDQpgYGANCg0KRnVsbCBkYXRhZnJhbWUgY29uc2lzdHMgb2YgMjUzMDYgb2JzZXJ2YXRpb25zIGFuZCA3MjcxIHZhcmlhYmxlcw0KDQojIyBFeHBvcnQgZGF0YQ0KVGhlIGZpbmFsIERGIGNvbnNpc3RzIG9mIDI1MzA2IG9ic2VydmF0aW9ucyB3aXRoIDcyNzEgdmFyaWFibGVzLg0KYGBge3IgY2xlYW4gZW52aXJvbmVtbnQgYW5kIGV4cG9ydCBkYXRhfQ0KI2NsZWFuIHRoZSBnbG9iYWwgZW52aXJvbm1lbnQuDQpybShsaXN0PWxzKClbISBscygpICVpbiUgYygibGlzcyIpXSkNCmBgYA0KDQpgYGB7ciBleHBvcnR9DQpzYXZlKGxpc3MsIA0KICAgICBmaWxlID0gZmlsZS5wYXRoKCJkYXRhIiwNCiAgICAgICAgICAiZGF0YS1wcm9jZXNzZWQiLA0KICAgICAgICAgICJsaXNzX21lcmdlZCIsDQogICAgICAgImxpc3NfbWVyZ2VkX3Jhdy5SZGF0YSIpKQ0KYGBgDQoNCg==