Goal

Clean alter data and remove faulty alter data.

Set up

#packages
library(tidyverse)

#load
load(file = "data/data-processed/liss_merged/lisscdn_afterrel_V4_240624.Rdata")

Data prep

#create check data
#first create a long format
alter_long <- liss_long %>% 
  select(nomem_encr,
         survey_wave,
         starts_with("alter_id"),
         starts_with("rel_alter"),
         starts_with("gender_alter")) %>% 
  rename(gender_alter.1 = gender_alter1,
         gender_alter.2 = gender_alter2,
         gender_alter.3 = gender_alter3,
         gender_alter.4 = gender_alter4,
         gender_alter.5 = gender_alter5,
         rel_alter.1 = rel_alter1,
         rel_alter.2 = rel_alter2,
         rel_alter.3 = rel_alter3,
         rel_alter.4 = rel_alter4,
         rel_alter.5 = rel_alter5,
         alter_id.1 = alter_id_1,
         alter_id.2 = alter_id_2,
         alter_id.3 = alter_id_3,
         alter_id.4 = alter_id_4,
         alter_id.5 = alter_id_5) %>% 
  pivot_longer(cols = c(starts_with("alter_id"),
                        starts_with("gender"),
                        starts_with("rel")),
               names_to = c("measure", "alter"),
               values_to = "value",
               names_pattern = "(.+)\\.(.+)") %>% 
  pivot_wider(names_from = measure,
              values_from = value) 

#create dyad id and remove na's on alter_id
check_data <- alter_long %>% 
  filter(!is.na(alter_id)) %>% 
  mutate(dyad_id = paste0(nomem_encr, alter_id)) %>% 
  select(1,7,4,2,3,5,6) %>% 
  arrange(dyad_id, survey_wave)

Alter Checks

Gender check

Check wether an alter switched gender. Although it is possible, it is still highly unlikely.

#create mean of gender over time. If not 0 or 1, then we have a problem.
check_data_gender <- check_data %>%
  arrange(nomem_encr, dyad_id, survey_wave) %>%
  group_by(dyad_id) %>%
  mutate(mean_gender = mean(gender_alter)) %>%
  ungroup()

#we lose 461 respondents due to gender transitions
gender_fault_ids <- check_data_gender %>% 
  filter(mean_gender != 2 & mean_gender != 1) %>%
  select(nomem_encr) %>%
  distinct()

Relationship check

Check wheter alters have weird and impposible relationship transformations

#Use paste0 to create a new variable which contain unique transition combinations
#then we can actually filter out the impossible combinations.
check_data <- check_data %>%
  group_by(dyad_id) %>%
  select(nomem_encr, dyad_id, survey_wave, rel_alter) %>%
  filter(!is.na(rel_alter)) %>% 
  mutate(rel_check = paste0(rel_alter, lag(rel_alter))) %>% #create unique variable
  ungroup()

#set impossible combinations. See codebook for the meaning of these. 
impossible_combinations <- c(12,13,14,15,16,17,18,110,
                             21,23,24,25,26,27,28,29,210,
                             31,32,34,35,36,37,38,39,310,
                             41,42,43,45,46,47,48,49,410,
                             51,52,53,54,56,57,58,59,510,
                             62,63,64,65,
                             72,73,74,75,
                             82,83,84,85,
                             92,93,94,95,
                             102,103,104,105)

#we lose 1000 ego over impossible combinations
#select the rows with impossible combinations
faulty_dyads_ids <- check_data %>%
  filter(rel_check %in% impossible_combinations) %>%
  select(nomem_encr) %>% 
  distinct() %>% 
  pull()

Clean data

Filter networks withimpossible gender or relationship transformations.

#filter out the networks with impossible combinations
liss_long <- liss_long %>%
  filter(!nomem_encr %in% faulty_dyads_ids) %>%  #remove dyads_ids
  filter(!nomem_encr %in% gender_fault_ids)  #remove gender_ids

#filter out from wide file
liss_wide <- liss_wide %>%
  filter(!nomem_encr %in% faulty_dyads_ids) %>%  #remove dyads_ids
  filter(!nomem_encr %in% gender_fault_ids)  #remove gender_ids

Data export

#clean global environment.
#only keep the wide and long files. 
rm(list=ls()[! ls() %in% c("liss_wide","liss_long")])

#save data
save.image(file = "data/data-processed/liss_merged/lisscdn_cleaned_V4_240624.Rdata")
LS0tDQp0aXRsZTogIkNsZWFuIGFsdGVyIGRhdGEiDQphdXRob3I6ICJUaGlqbWVuIEplcm9lbnNlIg0KZGF0ZTogIkxhc3QgY29tcGlsZWQgb24gYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiBUUlVFDQogICAgdG9jX2RlcHRoOiAzDQogICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgY29kZV9mb2xkaW5nOiBzaG93DQogICAgY29kZV9kb3dubG9hZDogVFJVRQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KA0KICBjYWNoZSA9IFRSVUUsDQogIG1lc3NhZ2UgPSBGQUxTRSwNCiAgd2FybmluZyA9IEZBTFNFLA0KICByZXN1bHRzID0gImFzaXMiLA0KICBmaWcuYWxpZ24gPSAiY2VudGVyIg0KKQ0KYGBgDQoNCiMgR29hbA0KDQpDbGVhbiBhbHRlciBkYXRhIGFuZCByZW1vdmUgZmF1bHR5IGFsdGVyIGRhdGEuIA0KDQojIFNldCB1cA0KDQpgYGB7ciBwYWNrYWdlcyBhbmQgZGF0YWltcG9ydH0NCiNwYWNrYWdlcw0KbGlicmFyeSh0aWR5dmVyc2UpDQoNCiNsb2FkDQpsb2FkKGZpbGUgPSAiZGF0YS9kYXRhLXByb2Nlc3NlZC9saXNzX21lcmdlZC9saXNzY2RuX2FmdGVycmVsX1Y0XzI0MDYyNC5SZGF0YSIpDQpgYGANCg0KIyBEYXRhIHByZXANCg0KYGBge3IgY3JlYXRlIGNoZWNrIGRhdGF9DQojY3JlYXRlIGNoZWNrIGRhdGENCiNmaXJzdCBjcmVhdGUgYSBsb25nIGZvcm1hdA0KYWx0ZXJfbG9uZyA8LSBsaXNzX2xvbmcgJT4lIA0KICBzZWxlY3Qobm9tZW1fZW5jciwNCiAgICAgICAgIHN1cnZleV93YXZlLA0KICAgICAgICAgc3RhcnRzX3dpdGgoImFsdGVyX2lkIiksDQogICAgICAgICBzdGFydHNfd2l0aCgicmVsX2FsdGVyIiksDQogICAgICAgICBzdGFydHNfd2l0aCgiZ2VuZGVyX2FsdGVyIikpICU+JSANCiAgcmVuYW1lKGdlbmRlcl9hbHRlci4xID0gZ2VuZGVyX2FsdGVyMSwNCiAgICAgICAgIGdlbmRlcl9hbHRlci4yID0gZ2VuZGVyX2FsdGVyMiwNCiAgICAgICAgIGdlbmRlcl9hbHRlci4zID0gZ2VuZGVyX2FsdGVyMywNCiAgICAgICAgIGdlbmRlcl9hbHRlci40ID0gZ2VuZGVyX2FsdGVyNCwNCiAgICAgICAgIGdlbmRlcl9hbHRlci41ID0gZ2VuZGVyX2FsdGVyNSwNCiAgICAgICAgIHJlbF9hbHRlci4xID0gcmVsX2FsdGVyMSwNCiAgICAgICAgIHJlbF9hbHRlci4yID0gcmVsX2FsdGVyMiwNCiAgICAgICAgIHJlbF9hbHRlci4zID0gcmVsX2FsdGVyMywNCiAgICAgICAgIHJlbF9hbHRlci40ID0gcmVsX2FsdGVyNCwNCiAgICAgICAgIHJlbF9hbHRlci41ID0gcmVsX2FsdGVyNSwNCiAgICAgICAgIGFsdGVyX2lkLjEgPSBhbHRlcl9pZF8xLA0KICAgICAgICAgYWx0ZXJfaWQuMiA9IGFsdGVyX2lkXzIsDQogICAgICAgICBhbHRlcl9pZC4zID0gYWx0ZXJfaWRfMywNCiAgICAgICAgIGFsdGVyX2lkLjQgPSBhbHRlcl9pZF80LA0KICAgICAgICAgYWx0ZXJfaWQuNSA9IGFsdGVyX2lkXzUpICU+JSANCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSBjKHN0YXJ0c193aXRoKCJhbHRlcl9pZCIpLA0KICAgICAgICAgICAgICAgICAgICAgICAgc3RhcnRzX3dpdGgoImdlbmRlciIpLA0KICAgICAgICAgICAgICAgICAgICAgICAgc3RhcnRzX3dpdGgoInJlbCIpKSwNCiAgICAgICAgICAgICAgIG5hbWVzX3RvID0gYygibWVhc3VyZSIsICJhbHRlciIpLA0KICAgICAgICAgICAgICAgdmFsdWVzX3RvID0gInZhbHVlIiwNCiAgICAgICAgICAgICAgIG5hbWVzX3BhdHRlcm4gPSAiKC4rKVxcLiguKykiKSAlPiUgDQogIHBpdm90X3dpZGVyKG5hbWVzX2Zyb20gPSBtZWFzdXJlLA0KICAgICAgICAgICAgICB2YWx1ZXNfZnJvbSA9IHZhbHVlKSANCg0KI2NyZWF0ZSBkeWFkIGlkIGFuZCByZW1vdmUgbmEncyBvbiBhbHRlcl9pZA0KY2hlY2tfZGF0YSA8LSBhbHRlcl9sb25nICU+JSANCiAgZmlsdGVyKCFpcy5uYShhbHRlcl9pZCkpICU+JSANCiAgbXV0YXRlKGR5YWRfaWQgPSBwYXN0ZTAobm9tZW1fZW5jciwgYWx0ZXJfaWQpKSAlPiUgDQogIHNlbGVjdCgxLDcsNCwyLDMsNSw2KSAlPiUgDQogIGFycmFuZ2UoZHlhZF9pZCwgc3VydmV5X3dhdmUpDQpgYGANCg0KIyBBbHRlciBDaGVja3MNCiMjIEdlbmRlciBjaGVjaw0KQ2hlY2sgd2V0aGVyIGFuIGFsdGVyIHN3aXRjaGVkIGdlbmRlci4gQWx0aG91Z2ggaXQgaXMgcG9zc2libGUsIGl0IGlzIHN0aWxsIGhpZ2hseSB1bmxpa2VseS4gDQoNCmBgYHtyIGdlbmRlciBjaGVja2p9DQojY3JlYXRlIG1lYW4gb2YgZ2VuZGVyIG92ZXIgdGltZS4gSWYgbm90IDAgb3IgMSwgdGhlbiB3ZSBoYXZlIGEgcHJvYmxlbS4NCmNoZWNrX2RhdGFfZ2VuZGVyIDwtIGNoZWNrX2RhdGEgJT4lDQogIGFycmFuZ2Uobm9tZW1fZW5jciwgZHlhZF9pZCwgc3VydmV5X3dhdmUpICU+JQ0KICBncm91cF9ieShkeWFkX2lkKSAlPiUNCiAgbXV0YXRlKG1lYW5fZ2VuZGVyID0gbWVhbihnZW5kZXJfYWx0ZXIpKSAlPiUNCiAgdW5ncm91cCgpDQoNCiN3ZSBsb3NlIDQ2MSByZXNwb25kZW50cyBkdWUgdG8gZ2VuZGVyIHRyYW5zaXRpb25zDQpnZW5kZXJfZmF1bHRfaWRzIDwtIGNoZWNrX2RhdGFfZ2VuZGVyICU+JSANCiAgZmlsdGVyKG1lYW5fZ2VuZGVyICE9IDIgJiBtZWFuX2dlbmRlciAhPSAxKSAlPiUNCiAgc2VsZWN0KG5vbWVtX2VuY3IpICU+JQ0KICBkaXN0aW5jdCgpDQpgYGANCg0KIyMgUmVsYXRpb25zaGlwIGNoZWNrDQpDaGVjayB3aGV0ZXIgYWx0ZXJzIGhhdmUgd2VpcmQgYW5kIGltcHBvc2libGUgcmVsYXRpb25zaGlwIHRyYW5zZm9ybWF0aW9ucw0KDQpgYGB7ciByZWwgY2hlY2t9DQojVXNlIHBhc3RlMCB0byBjcmVhdGUgYSBuZXcgdmFyaWFibGUgd2hpY2ggY29udGFpbiB1bmlxdWUgdHJhbnNpdGlvbiBjb21iaW5hdGlvbnMNCiN0aGVuIHdlIGNhbiBhY3R1YWxseSBmaWx0ZXIgb3V0IHRoZSBpbXBvc3NpYmxlIGNvbWJpbmF0aW9ucy4NCmNoZWNrX2RhdGEgPC0gY2hlY2tfZGF0YSAlPiUNCiAgZ3JvdXBfYnkoZHlhZF9pZCkgJT4lDQogIHNlbGVjdChub21lbV9lbmNyLCBkeWFkX2lkLCBzdXJ2ZXlfd2F2ZSwgcmVsX2FsdGVyKSAlPiUNCiAgZmlsdGVyKCFpcy5uYShyZWxfYWx0ZXIpKSAlPiUgDQogIG11dGF0ZShyZWxfY2hlY2sgPSBwYXN0ZTAocmVsX2FsdGVyLCBsYWcocmVsX2FsdGVyKSkpICU+JSAjY3JlYXRlIHVuaXF1ZSB2YXJpYWJsZQ0KICB1bmdyb3VwKCkNCg0KI3NldCBpbXBvc3NpYmxlIGNvbWJpbmF0aW9ucy4gU2VlIGNvZGVib29rIGZvciB0aGUgbWVhbmluZyBvZiB0aGVzZS4gDQppbXBvc3NpYmxlX2NvbWJpbmF0aW9ucyA8LSBjKDEyLDEzLDE0LDE1LDE2LDE3LDE4LDExMCwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgMjEsMjMsMjQsMjUsMjYsMjcsMjgsMjksMjEwLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAzMSwzMiwzNCwzNSwzNiwzNywzOCwzOSwzMTAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIDQxLDQyLDQzLDQ1LDQ2LDQ3LDQ4LDQ5LDQxMCwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgNTEsNTIsNTMsNTQsNTYsNTcsNTgsNTksNTEwLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICA2Miw2Myw2NCw2NSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgNzIsNzMsNzQsNzUsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIDgyLDgzLDg0LDg1LA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICA5Miw5Myw5NCw5NSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgMTAyLDEwMywxMDQsMTA1KQ0KDQojd2UgbG9zZSAxMDAwIGVnbyBvdmVyIGltcG9zc2libGUgY29tYmluYXRpb25zDQojc2VsZWN0IHRoZSByb3dzIHdpdGggaW1wb3NzaWJsZSBjb21iaW5hdGlvbnMNCmZhdWx0eV9keWFkc19pZHMgPC0gY2hlY2tfZGF0YSAlPiUNCiAgZmlsdGVyKHJlbF9jaGVjayAlaW4lIGltcG9zc2libGVfY29tYmluYXRpb25zKSAlPiUNCiAgc2VsZWN0KG5vbWVtX2VuY3IpICU+JSANCiAgZGlzdGluY3QoKSAlPiUgDQogIHB1bGwoKQ0KDQoNCmBgYA0KDQojIENsZWFuIGRhdGENCkZpbHRlciBuZXR3b3JrcyB3aXRoaW1wb3NzaWJsZSBnZW5kZXIgb3IgcmVsYXRpb25zaGlwIHRyYW5zZm9ybWF0aW9ucy4gDQoNCmBgYHtyIGZpbHRlciBvdXQgZGF0YSB9DQojZmlsdGVyIG91dCB0aGUgbmV0d29ya3Mgd2l0aCBpbXBvc3NpYmxlIGNvbWJpbmF0aW9ucw0KbGlzc19sb25nIDwtIGxpc3NfbG9uZyAlPiUNCiAgZmlsdGVyKCFub21lbV9lbmNyICVpbiUgZmF1bHR5X2R5YWRzX2lkcykgJT4lICAjcmVtb3ZlIGR5YWRzX2lkcw0KICBmaWx0ZXIoIW5vbWVtX2VuY3IgJWluJSBnZW5kZXJfZmF1bHRfaWRzKSAgI3JlbW92ZSBnZW5kZXJfaWRzDQoNCiNmaWx0ZXIgb3V0IGZyb20gd2lkZSBmaWxlDQpsaXNzX3dpZGUgPC0gbGlzc193aWRlICU+JQ0KICBmaWx0ZXIoIW5vbWVtX2VuY3IgJWluJSBmYXVsdHlfZHlhZHNfaWRzKSAlPiUgICNyZW1vdmUgZHlhZHNfaWRzDQogIGZpbHRlcighbm9tZW1fZW5jciAlaW4lIGdlbmRlcl9mYXVsdF9pZHMpICAjcmVtb3ZlIGdlbmRlcl9pZHMNCg0KDQpgYGANCg0KIyBEYXRhIGV4cG9ydA0KDQpgYGB7ciBkYXRhIGV4cG9ydH0NCiNjbGVhbiBnbG9iYWwgZW52aXJvbm1lbnQuDQojb25seSBrZWVwIHRoZSB3aWRlIGFuZCBsb25nIGZpbGVzLiANCnJtKGxpc3Q9bHMoKVshIGxzKCkgJWluJSBjKCJsaXNzX3dpZGUiLCJsaXNzX2xvbmciKV0pDQoNCiNzYXZlIGRhdGENCnNhdmUuaW1hZ2UoZmlsZSA9ICJkYXRhL2RhdGEtcHJvY2Vzc2VkL2xpc3NfbWVyZ2VkL2xpc3NjZG5fY2xlYW5lZF9WNF8yNDA2MjQuUmRhdGEiKQ0KYGBgDQoNCg0KDQo=


Copyright © 2024 Jeroense Thijmen