Please consider the following data frame:
df <- structure(list(oID = c(37751L, 30978L, 33498L),
peId = c(12L, 13L, 14L),
last_Name = c("ABC", "DEF", "EFG"),
first_Name = c("Z", "Y", "X"),
personnel_Number = list(structure(list(hId = c(1L, 4L, 5L),
hName = c("PS", "XY", "MN"),
personnel_Number = c("0123", "1234", "98")),
class = "data.frame",
row.names = c(NA, 3L)),
structure(list(hId = 1L, hName = "PS",
personnel_Number = "0987"),
class = "data.frame",
row.names = 1L),
structure(list(),
names = character(0),
row.names = integer(0),
class = "data.frame")),
ls_Role = list(structure(list(functionId = c(1L, 5L),
`function` = c("function A", "function B"),
function_Short = c("FA", "FB")),
class = "data.frame",
row.names = 1:2),
structure(list(functionId = 6L,
`function` = "function A",
function_Short = "FA"),
class = "data.frame",
row.names = 1L),
structure(list(functionId = 6L,
`function` = "function A",
function_Short = "FA"),
class = "data.frame",
row.names = 1L))),
row.names = c(1L, 2L, 3L),
class = "data.frame")
As you can see, the data frame contains two list columns that each contain row-based data frames. I want to unnest each of these list columns in a way that their "inner" data frames will be put into wide format, i.e. all of the columns are repeated for as many rows as the inner data frames contain per row.
So for the "personnel_number" column I expect to return 9 columns (hId_1, _2, _3; hName_1, _2 and so on). And the same for the ls_Role column.
I know how I can do it manually by unnesting and then heavily reshaping the whole data frame, but I'm wondering if there is a more concise way that does it more automatically or with less function calls, preferably in a tidyverse manner.
Here's my current code (which also reflects my expected output):
library(tidyverse)
df |>
mutate(id = row_number()) |>
unnest_longer(col = personnel_Number, keep_empty = TRUE) |>
unpack(cols = personnel_Number) |>
mutate(id_inner = row_number(), .by = id) |>
pivot_wider(values_from = c(hId, hName, personnel_Number),
names_from = id_inner) |>
unnest_longer(col = ls_Role, keep_empty = TRUE) |>
unpack(cols = ls_Role) |>
mutate(id_inner = row_number(), .by = id) |>
pivot_wider(values_from = c(functionId, "function", function_Short),
names_from = id_inner)
For your example data, you can get to the desired result by unnesting wider all lists twice, e.g.
library(tidyr)
library(dplyr)
res <- df |>
mutate(id = row_number(), .after = first_Name) |>
unnest_wider(where(is.list)) |>
unnest_wider(where(is.list), names_sep = "_")
Where res
is:
# A tibble: 3 × 20
oID peId last_Name first_Name id hId_1 hId_2 hId_3 hName_1 hName_2 hName_3
<int> <int> <chr> <chr> <int> <int> <int> <int> <chr> <chr> <chr>
1 37751 12 ABC Z 1 1 4 5 PS XY MN
2 30978 13 DEF Y 2 1 NA NA PS NA NA
3 33498 14 EFG X 3 NA NA NA NA NA NA
# ℹ 9 more variables: personnel_Number_1 <chr>, personnel_Number_2 <chr>,
# personnel_Number_3 <chr>, functionId_1 <int>, functionId_2 <int>,
# function_1 <chr>, function_2 <chr>, function_Short_1 <chr>,
# function_Short_2 <chr>
Check it matches desired result:
identical(wanted, res)
[1] TRUE
wanted
data:
wanted <- df |>
mutate(id = row_number()) |>
unnest_longer(col = personnel_Number, keep_empty = TRUE) |>
unpack(cols = personnel_Number) |>
mutate(id_inner = row_number(), .by = id) |>
pivot_wider(values_from = c(hId, hName, personnel_Number),
names_from = id_inner) |>
unnest_longer(col = ls_Role, keep_empty = TRUE) |>
unpack(cols = ls_Role) |>
mutate(id_inner = row_number(), .by = id) |>
pivot_wider(values_from = c(functionId, "function", function_Short),
names_from = id_inner)