2023-11-06

How do we create siblings composition column out of gender and family id and individual id columns in R

I applied the suggested code to the original dataset. But it didn't produced the desired result in the siblings_composition column such that 1 for at least 1 male sibling, 2 for at least 1 female sibling, 3 for both male and female siblings and 0 for no siblings. In the original dataset BIRIMNO is for family_id, CINSIYET is for gender and id is for individual_id. As an illustration I provide the result which is produced by the code below:

head(data)

# A tibble: 6 × 4
# Groups:   BIRIMNO [5]
  BIRIMNO CINSIYET       id siblings_composition
    <dbl> <fct>       <dbl>                <int>
1  144003 F        14400307                    3
2  144003 M        14400306                    3
3  144009 F        14400903                    3
4  144014 M        14401409                    3
5  144015 M        14401501                    2
6  144016 M        14401603                    3

For reproducability on the original dataset, the result of:

dput(head(data, 100))

structure(list(BIRIMNO = c(144003, 144003, 144009, 144014, 144015, 
144016, 144020, 144020, 144021, 144025, 144025, 144025, 144028, 
144028, 144029, 144031, 144034, 144036, 144039, 144040, 144042, 
144042, 144046, 144047, 144047, 144049, 144054, 144056, 144056, 
144060, 144061, 144069, 144071, 144071, 144071, 144071, 144073, 
144074, 144074, 144077, 144079, 144080, 144084, 144084, 144084, 
144088, 144088, 144090, 144092, 144092, 144092, 144094, 144113, 
144118, 144120, 144122, 144123, 144123, 144123, 144124, 144127, 
144127, 144129, 144129, 144130, 144134, 144137, 144138, 144151, 
144152, 144154, 144158, 144162, 144162, 144162, 144162, 144163, 
144163, 144163, 144167, 144172, 144172, 144176, 144176, 144181, 
144181, 144183, 144185, 144189, 144202, 144202, 144214, 144215, 
144217, 144219, 144224, 144224, 144247, 144247, 144249), CINSIYET = structure(c(2L, 
1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 
1L, 1L, 2L), levels = c("M", "F"), class = "factor"), id = c(14400307, 
14400306, 14400903, 14401409, 14401501, 14401603, 14402003, 14402004, 
14402103, 14402503, 14402505, 14402506, 14402803, 14402804, 14402904, 
14403104, 14403404, 14403603, 14403903, 14404003, 14404205, 14404204, 
14404603, 14404703, 14404704, 14404905, 14405403, 14405603, 14405604, 
14406004, 14406103, 14406903, 14407109, 14407112, 14407111, 14407110, 
14407303, 14407403, 14407404, 14407706, 14407908, 14408006, 14408405, 
14408404, 14408403, 14408803, 14408804, 14409004, 14409204, 14409205, 
14409203, 14409405, 14411303, 14411804, 14412003, 14412203, 14412304, 
14412306, 14412305, 14412407, 14412704, 14412705, 14412906, 14412905, 
14413003, 14413403, 14413703, 14413804, 14415103, 14415203, 14415404, 
14415803, 14416207, 14416204, 14416206, 14416205, 14416306, 14416307, 
14416308, 14416704, 14417204, 14417205, 14417603, 14417604, 14418104, 
14418103, 14418303, 14418503, 14418903, 14420204, 14420203, 14421403, 
14421503, 14421704, 14421903, 14422403, 14422404, 14424704, 14424703, 
14424903), siblings_composition = c(3L, 3L, 3L, 3L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    BIRIMNO = c(144003, 144009, 144014, 144015, 144016, 144020, 
    144021, 144025, 144028, 144029, 144031, 144034, 144036, 144039, 
    144040, 144042, 144046, 144047, 144049, 144054, 144056, 144060, 
    144061, 144069, 144071, 144073, 144074, 144077, 144079, 144080, 
    144084, 144088, 144090, 144092, 144094, 144113, 144118, 144120, 
    144122, 144123, 144124, 144127, 144129, 144130, 144134, 144137, 
    144138, 144151, 144152, 144154, 144158, 144162, 144163, 144167, 
    144172, 144176, 144181, 144183, 144185, 144189, 144202, 144214, 
    144215, 144217, 144219, 144224, 144247, 144249), .rows = structure(list(
        1:2, 3L, 4L, 5L, 6L, 7:8, 9L, 10:12, 13:14, 15L, 16L, 
        17L, 18L, 19L, 20L, 21:22, 23L, 24:25, 26L, 27L, 28:29, 
        30L, 31L, 32L, 33:36, 37L, 38:39, 40L, 41L, 42L, 43:45, 
        46:47, 48L, 49:51, 52L, 53L, 54L, 55L, 56L, 57:59, 60L, 
        61:62, 63:64, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 
        73:76, 77:79, 80L, 81:82, 83:84, 85:86, 87L, 88L, 89L, 
        90:91, 92L, 93L, 94L, 95L, 96:97, 98:99, 100L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -68L), .drop = TRUE))

Many thanks



No comments:

Post a Comment