How do we create siblings composition column out of gender and family id and individual id columns in R
I applied the suggested code to the original dataset. But it didn't produced the desired result in the siblings_composition column such that 1 for at least 1 male sibling, 2 for at least 1 female sibling, 3 for both male and female siblings and 0 for no siblings. In the original dataset BIRIMNO is for family_id, CINSIYET is for gender and id is for individual_id. As an illustration I provide the result which is produced by the code below:
head(data)
# A tibble: 6 × 4
# Groups: BIRIMNO [5]
BIRIMNO CINSIYET id siblings_composition
<dbl> <fct> <dbl> <int>
1 144003 F 14400307 3
2 144003 M 14400306 3
3 144009 F 14400903 3
4 144014 M 14401409 3
5 144015 M 14401501 2
6 144016 M 14401603 3
For reproducability on the original dataset, the result of:
dput(head(data, 100))
structure(list(BIRIMNO = c(144003, 144003, 144009, 144014, 144015,
144016, 144020, 144020, 144021, 144025, 144025, 144025, 144028,
144028, 144029, 144031, 144034, 144036, 144039, 144040, 144042,
144042, 144046, 144047, 144047, 144049, 144054, 144056, 144056,
144060, 144061, 144069, 144071, 144071, 144071, 144071, 144073,
144074, 144074, 144077, 144079, 144080, 144084, 144084, 144084,
144088, 144088, 144090, 144092, 144092, 144092, 144094, 144113,
144118, 144120, 144122, 144123, 144123, 144123, 144124, 144127,
144127, 144129, 144129, 144130, 144134, 144137, 144138, 144151,
144152, 144154, 144158, 144162, 144162, 144162, 144162, 144163,
144163, 144163, 144167, 144172, 144172, 144176, 144176, 144181,
144181, 144183, 144185, 144189, 144202, 144202, 144214, 144215,
144217, 144219, 144224, 144224, 144247, 144247, 144249), CINSIYET = structure(c(2L,
1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 2L), levels = c("M", "F"), class = "factor"), id = c(14400307,
14400306, 14400903, 14401409, 14401501, 14401603, 14402003, 14402004,
14402103, 14402503, 14402505, 14402506, 14402803, 14402804, 14402904,
14403104, 14403404, 14403603, 14403903, 14404003, 14404205, 14404204,
14404603, 14404703, 14404704, 14404905, 14405403, 14405603, 14405604,
14406004, 14406103, 14406903, 14407109, 14407112, 14407111, 14407110,
14407303, 14407403, 14407404, 14407706, 14407908, 14408006, 14408405,
14408404, 14408403, 14408803, 14408804, 14409004, 14409204, 14409205,
14409203, 14409405, 14411303, 14411804, 14412003, 14412203, 14412304,
14412306, 14412305, 14412407, 14412704, 14412705, 14412906, 14412905,
14413003, 14413403, 14413703, 14413804, 14415103, 14415203, 14415404,
14415803, 14416207, 14416204, 14416206, 14416205, 14416306, 14416307,
14416308, 14416704, 14417204, 14417205, 14417603, 14417604, 14418104,
14418103, 14418303, 14418503, 14418903, 14420204, 14420203, 14421403,
14421503, 14421704, 14421903, 14422403, 14422404, 14424704, 14424703,
14424903), siblings_composition = c(3L, 3L, 3L, 3L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
BIRIMNO = c(144003, 144009, 144014, 144015, 144016, 144020,
144021, 144025, 144028, 144029, 144031, 144034, 144036, 144039,
144040, 144042, 144046, 144047, 144049, 144054, 144056, 144060,
144061, 144069, 144071, 144073, 144074, 144077, 144079, 144080,
144084, 144088, 144090, 144092, 144094, 144113, 144118, 144120,
144122, 144123, 144124, 144127, 144129, 144130, 144134, 144137,
144138, 144151, 144152, 144154, 144158, 144162, 144163, 144167,
144172, 144176, 144181, 144183, 144185, 144189, 144202, 144214,
144215, 144217, 144219, 144224, 144247, 144249), .rows = structure(list(
1:2, 3L, 4L, 5L, 6L, 7:8, 9L, 10:12, 13:14, 15L, 16L,
17L, 18L, 19L, 20L, 21:22, 23L, 24:25, 26L, 27L, 28:29,
30L, 31L, 32L, 33:36, 37L, 38:39, 40L, 41L, 42L, 43:45,
46:47, 48L, 49:51, 52L, 53L, 54L, 55L, 56L, 57:59, 60L,
61:62, 63:64, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73:76, 77:79, 80L, 81:82, 83:84, 85:86, 87L, 88L, 89L,
90:91, 92L, 93L, 94L, 95L, 96:97, 98:99, 100L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -68L), .drop = TRUE))
Many thanks
Comments
Post a Comment