I am trying to reshape my data from long to wide format. I would like to keep all rows that match for sn3=1 and sn3=2 instead of taking only the first value. How can I achieve this?
Desired output:
sn2 | leisure.1 | hmonth.1 | hyear.1 | leisure.2 | hmonth.2 | hyear.2 |
---|---|---|---|---|---|---|
227 | 230 | 6 | 2000 | 540 | 6 | 2000 |
227 | 130 | 6 | 2000 | 170 | 6 | 2000 |
250 | 370 | 6 | 2000 | 380 | 6 | 2000 |
250 | 380 | 6 | 2000 | 190 | 6 | 2000 |
294 | 120 | 6 | 2000 | 210 | 6 | 2000 |
294 | 200 | 6 | 2000 | 310 | 6 | 2000 |
307 | 130 | 7 | 2000 | 220 | 7 | 2000 |
307 | 480 | 7 | 2000 | 270 | 7 | 2000 |
> dput(df)
structure(list(sn2 = structure(c(227, 227, 227, 227, 249, 249,
250, 250, 250, 250, 294, 294, 294, 294, 307, 307, 307, 307), label = "household number", format.stata = "%8.0g"),
sn3 = structure(c(1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2,
2, 1, 1, 2, 2), label = "person number", format.stata = "%8.0g"),
leisure = c(230, 130, 540, 170, 430, 480, 370, 380, 380,
190, 120, 200, 210, 310, 130, 480, 220, 270), hmonth = structure(c(6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6, july = 7,
august = 8, september = 9, october = 10, november = 11, december = 12
), class = c("haven_labelled", "vctrs_vctr", "double")),
hyear = structure(c(2000, 2000, 2000, 2000, 2000, 2000, 2000,
2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,
2000), label = "year of household interview", format.stata = "%8.0g")), row.names = c(NA,
-18L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`3492` = 3492L,
`3493` = 3493L, `3494` = 3494L, `3495` = 3495L, `3496` = 3496L,
`3497` = 3497L, `3498` = 3498L, `3499` = 3499L, `3500` = 3500L,
`3501` = 3501L, `3508` = 3508L, `3509` = 3509L, `3510` = 3510L,
`3511` = 3511L, `3512` = 3512L, `3513` = 3513L, `3518` = 3518L,
`3519` = 3519L, `3520` = 3520L, `3521` = 3521L, `3522` = 3522L,
`3523` = 3523L, `3524` = 3524L, `3525` = 3525L), class = "omit"))
As long as you get rid of the rows for which each sn2 only has a single value of sn3, you can just pivot_wider
then unnest_longer
:
library(tidyverse)
df %>%
filter(n() > 2L, .by = sn2) %>%
pivot_wider(names_from = sn3, values_from = leisure:hyear,
values_fn = list) %>%
unnest_longer(leisure_1:hyear_2)
#> # A tibble: 8 x 7
#> sn2 leisure_1 leisure_2 hmonth_1 hmonth_2 hyear_1 hyear_2
#> <dbl> <dbl> <dbl> <dbl+lbl> <dbl+lbl> <dbl> <dbl>
#> 1 227 230 540 6 [june] 6 [june] 2000 2000
#> 2 227 130 170 6 [june] 6 [june] 2000 2000
#> 3 250 370 380 6 [june] 6 [june] 2000 2000
#> 4 250 380 190 6 [june] 6 [june] 2000 2000
#> 5 294 120 210 6 [june] 6 [june] 2000 2000
#> 6 294 200 310 6 [june] 6 [june] 2000 2000
#> 7 307 130 220 7 [july] 7 [july] 2000 2000
#> 8 307 480 270 7 [july] 7 [july] 2000 2000