Data preparation script NELLS data

Libraries

# get packages needed
fpackage.check <- function(packages) { # (c) Jochem Tolsma
  lapply(packages, FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  })
}
packages = c("tidyverse", "lubridate")
fpackage.check(packages)

NELLS combined improt

Import the NELLS datafile with selected neigbhourhood variables.

load(file =  "data_analysis/data/data_processed/nells_data/2023-05-08_nells_combined.rds")

Custum functions

All the steps in the data preparation are save in custom functions. In this section all the custom functions that we used in the data preparation are listed alphabetically.

Age

#Age from date of birth function
age_from_birth <- function(x) {
  date_list <- x %>%
    select(V9) %>%
    as.list()
  
  #replace " " and "/" with "-"
  date_list <-
    str_replace_all(date_list$V9, pattern = "/", replacement = "-")
  date_list <-
    str_replace_all(date_list, pattern = " ", replacement = "-")
  
  #extract year of birth from date of birth and remove date of birth from data.
  x$year_birth <- year(dmy(date_list))
  x$age <- 2022 - x$year_birth
 
  df <- x %>% 
    select(-V9)
  
  return(df)
}

Children

#children prep
prepare_children <- function(x) {#x =  nells_nsum
  df <- x %>%
    mutate(across(.cols = V62:V65,
                  .fns = ~ as.numeric(x = .x))) %>%
    rename(
      nr_children = V62,
      age_oldest_child = V63,
      age_youngest_child = V64,
      age_only_child = V65
    ) %>%
    mutate(oldest_child_education = ifelse(V66 == 1, 1, 0),
           only_child_education = ifelse(Q230 == 1, 1, 0),
           age_youngest_child_combined = ifelse(nr_children == 1, age_only_child, NA),
           age_youngest_child_combined = ifelse(nr_children > 1, age_youngest_child, age_youngest_child_combined),
           age_child_below12 = ifelse(age_youngest_child_combined < 13, 1, 0)
           ) 
}

Education

prepare_education <- function(x) {
  df <- x %>%
    mutate(education_completed_highest = factor(
      V28,
      levels = 1:15,
      labels = c(
        "No education",
        "Primary education",
        "lbo, vmbo-kb",
        "mavo, vmbo-gl/tl",
        "havo",
        "vwo/gymnasium",
        "mbo-kort",
        "mbo-tussen/lang",
        "hbo",
        "university bachelor",
        "university master",
        "PhD",
        "foreign, primary education",
        "foreign, secundary education",
        "foreign, tertiary education"
      )
    ),
    in_education = ifelse(V26 == 3, 0, 1),
    education_level_attend = factor(
      V27,
      levels = 1:12,
      labels = c(
        "Primary education",
        "lbo, vmbo-kb",
        "mavo, vmbo-gl/tl",
        "havo",
        "vwo/gymnasium",
        "mbo-kort",
        "mbo-tussen/lang",
        "hbo",
        "university bachelor",
        "university master",
        "PhD",
        "Foreign"
      )
    )
    )
}

Gender

#Gender recode
prepare_gender <- function(x) {
  df <- x %>%
    mutate(gender = factor(
      V10,
      levels = 1:3,
      labels = c("Male", "Female", "Other")
    ))
}

Migration age

prepare_migration_age <- function(x){
  df <- x %>% 
    mutate(migration_age = as.numeric(V18))
}

Migration background

prepare_migration <- function(x) {
  df <- x %>%
    mutate(
      V13 = as.numeric(V13),
      V14 = as.numeric(V14),
      V15 = as.numeric(V15),
      country_birth = ifelse(V13 == 130, 2, V13),
      country_birth = ifelse(country_birth == 220, 3, country_birth),
      country_birth = ifelse(country_birth > 3, 4, country_birth),
      country_birth_mother = ifelse(V14 == 130, 2, V14),
      country_birth_mother = ifelse(country_birth_mother == 220, 3, country_birth_mother),
      country_birth_mother = ifelse(country_birth_mother > 3, 4, country_birth_mother),
      country_birth_father = ifelse(V15 == 130, 2, V15),
      country_birth_father = ifelse(country_birth_father == 220, 3, country_birth_father),
      country_birth_father = ifelse(country_birth_father > 3, 4, country_birth_father),
      country_birth_fac = factor(
        country_birth,
        levels = 1:4,
        labels = c("Dutch", "Moroccan", "Turkish", "Other")
      ),
      country_birth_mother_fac = factor(
        country_birth_mother,
        levels = 1:4,
        labels = c("Dutch", "Moroccan", "Turkish", "Other")
      ),
      country_birth_father_fac = factor(
        country_birth_father,
        levels = 1:4,
        labels = c("Dutch", "Moroccan", "Turkish", "Other")
      ),
      migration_background = ifelse(
        country_birth_fac == "Dutch" &
          country_birth_mother_fac == "Dutch" |
          country_birth_father_fac == "Dutch",
        1,
        NA
      ),
      migration_background = ifelse(
        country_birth_fac == "Dutch" &
          (
            country_birth_mother_fac == "Other" |
              country_birth_father_fac == "Other"
          ),
        1,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Other" &
          (
            country_birth_mother_fac == "Other" |
              country_birth_father_fac == "Other"
          ),
        6,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Dutch" &
          (
            country_birth_mother_fac == "Moroccan" |
              country_birth_father_fac == "Moroccan"
          ),
        2,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Dutch" &
          (
            country_birth_mother_fac == "Turkish" |
              country_birth_father_fac == "Turkish"
          ),
        3,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Turkish" &
          (
            country_birth_mother_fac == "Turkish" |
              country_birth_father_fac == "Turkish"
          ),
        4,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Moroccan" &
          (
            country_birth_mother_fac == "Moroccan" |
              country_birth_father_fac == "Moroccan"
          ),
        5,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Other" &
          (
            country_birth_mother_fac == "Moroccan" |
              country_birth_father_fac == "Moroccan"
          ),
        2,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Other" &
          (
            country_birth_mother_fac == "Turkish" |
              country_birth_father_fac == "Turkish"
          ),
        3,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Turkish" &
          (
            country_birth_mother_fac == "Other" |
              country_birth_father_fac == "Other"
          ),
        4,
        migration_background
      ),
      migration_background = ifelse(
        country_birth_fac == "Moroccan" &
          (
            country_birth_mother_fac == "Other" |
              country_birth_father_fac == "Other"
          ),
        5,
        migration_background
      ),
      migration_background_fac = factor(
        migration_background,
        levels = 1:6,
        labels = c(
          "Dutch",
          "2nd gen Moroccan",
          "2nd gen Turkish",
          "1st gen Turkish",
          "1st gen Moroccan",
          "Other"
        )
      )
    )
}

Migration motive

prepare_migration_motive <- function(x) {
  df <- x %>%
    mutate(across(starts_with("V21"), ~ factor(
      .x,
      levels = 1:3,
      labels = c("Yes,strongly", "Yes, somewhwat", "None")
    ))) %>%
    rename(
      mig_motive_work_income = V21_1,
      mig_motive_study = V21_2,
      mig_motive_politics = V21_3,
      mig_motive_familiy = V21_4
    )
}

NSUM variable prep

#Rename NSUM variables
rename_nells_variables <- function(x) {
  df <- x %>%
    rename(
      knows_daan = V122_1,
      knows_kevin = V122_2,
      knows_edwin = V122_3,
      knows_albert = V122_4,
      knows_emma = V122_5,
      knows_linda = V122_6,
      knows_ingrid = V122_7,
      knows_willemina = V122_8,
      knows_mohammed = V122_9,
      knows_fatima = V122_10,
      knows_esra = V122_11,
      knows_ibrahim = V122_12,
      knows_prison = V123_1,
      knows_mbo = V123_2,
      knows_hbo = V123_3,
      knows_university = V123_4,
      knows_secundary = V123_5,
      knows_unemployed = V123_6,
      knows_secondhome = V123_7,
      knows_turkishmigration = V123_8,
      knows_moroccanmigration = V123_9,
      knows_hoofddoek = V123_10,
      knows_ramadan = V123_11
    )
}

#Midpoint recode function
midpoint_recode <- function(x) {
  case_when(x == 1 ~ 0,
            x == 2 ~ 1,
            x == 3 ~ 3,
            x == 4 ~ 8,
            x == 5 ~ 15,
            x == 6 ~ 35,
            x == 7 ~ 50)
}

#NSUM midpoint recode function
prepare_nsum <- function(x){
  df_names <- x %>% 
    select(id, starts_with("V122")) %>%
    mutate(across(starts_with("V12"), ~ midpoint_recode(.x))) %>% 
    mutate(across(starts_with("V122"), ~ ifelse(.x > 8, 11, .x))) %>% 
    rename(knows_daan_boundary = V122_1,
           knows_kevin_boundary = V122_2,
           knows_edwin_boundary = V122_3,
           knows_albert_boundary = V122_4,
           knows_emma_boundary = V122_5,
           knows_linda_boundary = V122_6,
           knows_ingrid_boundary = V122_7,
           knows_willemina_boundary = V122_8,
           knows_mohammed_boundary = V122_9,
           knows_fatima_boundary = V122_10,
           knows_esra_boundary = V122_11,
           knows_ibrahim_boundary = V122_12)
    
  
  df <- x %>% 
    mutate(across(starts_with("V12"), ~ midpoint_recode(.x))) %>% 
    mutate(knows_prison_boundary = ifelse(V123_1 > 8, 11, V123_1)) %>% 
    left_join(df_names, by = "id")
}

Partner status

prepare_partner <- function(x) {
  df <- x %>%
    mutate(
      partner_extended = factor(
        V53,
        levels = 1:4,
        labels = c(
          "Does not have a partner",
          "Has a partner, lives seperately",
          "Lives together, unmarried",
          "Married"
        )
      ),
      partner = if_else(V53 == 1, 0, 1)
    )
}

Outgroup attitudes

prepare_therm <- function(x) {
  df <-  x %>%
    mutate(across(.cols = starts_with("Q225"),
                  .fns = ~ as.numeric(.))) %>%
    rename(
      therm_dutch_maj = Q225_24,
      therm_mor = Q225_25,
      therm_tur = Q225_26
    )
}

Partner migration background

prepare_migration_partner <- function(x) {
  df <- x %>%
    mutate(
      V55 = as.numeric(V55),
      V56 = as.numeric(V56),
      V57 = as.numeric(V57),
      country_birth_partner = ifelse(V55 == 130, 2, V55),
      country_birth_partner = ifelse(country_birth_partner == 220, 3, country_birth_partner),
      country_birth_partner = ifelse(country_birth_partner > 3, 4, country_birth_partner),
      country_birth_mother_partner = ifelse(V57 == 130, 2, V57),
      country_birth_mother_partner = ifelse(
        country_birth_mother_partner == 220,
        3,
        country_birth_mother_partner
      ),
      country_birth_mother_partner = ifelse(
        country_birth_mother_partner > 3,
        4,
        country_birth_mother_partner
      ),
      country_birth_father_partner = ifelse(V56 == 130, 2, V56),
      country_birth_father_partner = ifelse(
        country_birth_father_partner == 220,
        3,
        country_birth_father_partner
      ),
      country_birth_father_partner = ifelse(
        country_birth_father_partner > 3,
        4,
        country_birth_father_partner
      ),
      country_birth_partner_fac = factor(
        country_birth_partner,
        levels = 1:4,
        labels = c("Dutch", "Moroccan", "Turkish", "Other")
      ),
      country_birth_mother_partner_fac = factor(
        country_birth_mother_partner,
        levels = 1:4,
        labels = c("Dutch", "Moroccan", "Turkish", "Other")
      ),
      country_birth_father_partner_fac = factor(
        country_birth_father_partner,
        levels = 1:4,
        labels = c("Dutch", "Moroccan", "Turkish", "Other")
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Dutch" &
          country_birth_mother_partner_fac == "Dutch" |
          country_birth_father_partner_fac == "Dutch",
        1,
        NA
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Dutch" &
          (
            country_birth_mother_partner_fac == "Other" |
              country_birth_father_partner_fac == "Other"
          ),
        1,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Other" &
          (
            country_birth_mother_partner_fac == "Other" |
              country_birth_father_partner_fac == "Other"
          ),
        6,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Dutch" &
          (
            country_birth_mother_partner_fac == "Moroccan" |
              country_birth_father_partner_fac == "Moroccan"
          ),
        2,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Dutch" &
          (
            country_birth_mother_partner_fac == "Turkish" |
              country_birth_father_partner_fac == "Turkish"
          ),
        3,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Turkish" &
          (
            country_birth_mother_partner_fac == "Turkish" |
              country_birth_father_partner_fac == "Turkish"
          ),
        4,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Moroccan" &
          (
            country_birth_mother_partner_fac == "Moroccan" |
              country_birth_father_partner_fac == "Moroccan"
          ),
        5,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Other" &
          (
            country_birth_mother_partner_fac == "Moroccan" |
              country_birth_father_partner_fac == "Moroccan"
          ),
        2,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Other" &
          (
            country_birth_mother_partner_fac == "Turkish" |
              country_birth_father_partner_fac == "Turkish"
          ),
        3,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Turkish" &
          (
            country_birth_mother_partner_fac == "Other" |
              country_birth_father_partner_fac == "Other"
          ),
        4,
        migration_background_partner
      ),
      migration_background_partner = ifelse(
        country_birth_partner_fac == "Moroccan" &
          (
            country_birth_mother_partner_fac == "Other" |
              country_birth_father_partner_fac == "Other"
          ),
        5,
        migration_background_partner
      ),
      migration_background_partner_fac = factor(
        migration_background_partner,
        levels = 1:6,
        labels = c(
          "Dutch",
          "2nd gen Moroccan",
          "2nd gen Turkish",
          "1st gen Turkish",
          "1st gen Moroccan",
          "Other"
        )
      )
    )
}

Religion

religion_prep <- function(x) {#x = nells_nsum
  df <- x %>%
    mutate(religious_denom = ifelse(as.numeric(V89) == 2, 13, as.numeric(V90)),
           religious_denom = factor(
             religious_denom,
             levels = 1:13,
             labels = c(
               "Catholic",
               "PKN (hervormd)",
               "PKN (reformed)",
               "PKN (luthers)",
               "Protestant other",
               "Islam sunni",
               "Islam shia",
               "Islam other",
               "Judaism",
               "Hindu",
               "Budhism",
               "Other",
               "None"
             )
           ),
           rel_attendance = factor(
             as.numeric(V91),
             levels = 1:7,
             labels = c(
               "Never",
               "1-2 a year",
               "3-11 a year",
               "Once a month",
               "2-3 a month",
               "Every week",
               "Multiple times a week"
             )
             )
    ) 
}

Data preparation

Prepare the data with the custom functions.

Independent and control variables

#AGE
#edit date into year
nells_nsum <- age_from_birth(nells_nsum)

#GENDER
nells_nsum<- prepare_gender(nells_nsum)
#CHILDREN
nells_nsum <- prepare_children(nells_nsum)

#EDUCATION
nells_nsum <- prepare_education(nells_nsum)

#MIGRATION BACKGROUND
nells_nsum <- prepare_migration(nells_nsum)
nells_nsum <- prepare_migration_age(nells_nsum)
nells_nsum <- prepare_migration_motive(nells_nsum)

#MIGRATION BACKGROUND PARTNER
nells_nsum <- prepare_migration_partner(nells_nsum)

#PAID WORK
nells_nsum <- prepare_paid_work(nells_nsum)

#PARTNER
nells_nsum <- prepare_partner(nells_nsum)

#RELIGION
nells_nsum <- religion_prep(nells_nsum)

#THERMOMETER
nells_nsum <- prepare_therm(nells_nsum)

##########################

NSUM variables

# Data preparation NSUM variables
nells_nsum <- prepare_nsum(nells_nsum)
nells_nsum <- rename_nells_variables(nells_nsum)

Export data

Only select the variables that we need for the analyses. Save the data as seperate .rds.

nells_nsum <- nells_nsum %>% 
  select(id,
         mun_code,
         pc_code,
         age,
         gender,
         partner_extended,
         partner,
         nr_children,
         age_oldest_child,
         age_youngest_child,
         age_youngest_child_combined,
         age_child_below12,
         age_only_child,
         oldest_child_education,
         only_child_education,
         in_education,
         education_level_attend,
         education_completed_highest,
         paid_work,
         religious_denom,
         rel_attendance,
         migration_age,
         starts_with("mig_motive"),
         contains("country_birth"),
         migration_background,
         migration_background_fac,
         migration_background_partner,
         migration_background_partner_fac,
         starts_with("therm"),
         contains("knows_"),
         ends_with("gem"),
         ends_with("pc4"),
         mean_woz_gem,
         mean_p_koop_gem)


save(nells_nsum,
        file = "data_analysis/data/data_processed/nells_data/2022-05-08_nells_nsum_data.rds")



Copyright © 2024 Jeroense Thijmen