I have not found an example that applies to my specific problem, but feel free to label as duplicate if needed.
I have a nested list like the following:
nested_list <- list(
ID1 = list(
FEAT = list(
feat1 = list(start = "1", end = "15", label = "CDR1"),
feat2 = list(start = "20", end = "25", label = "CDR2")
),
SEQ = "ACTGATCGTAGCTAGCTAGATGCTGATGTGTC"
),
ID2 = list(
SEQ = "ACTGATCGGCGGTGGCTAGCTGTGGGGCGCGCGACCGGGAAAA"
)
)
I want to obtain a data frame like this:
id feat feat_label feat_start feat_end full_seq
1 ID1 feat1 CDR1 1 15 ACTGATCGTAGCTAGCTAGATGCTGATGTGTC
2 ID1 feat2 CDR2 20 25 ACTGATCGTAGCTAGCTAGATGCTGATGTGTC
3 ID2 <NA> <NA> NA NA ACTGATCGGCGGTGGCTAGCTGTGGGGCGCGCGACCGGGAAAA
as.data.frame(nested_list)
does not produce what I want, so it probably needs an extra step to turn the data frame into "longer" format, ideally with something like tidyr::pivot_longer()
.
I think this gets you the essentials of what you want. The column order and column names are easily adjusted.
nested_list <- list(
ID1 = list(
FEAT = list(
feat1 = list(start = "1", end = "15", label = "CDR1"),
feat2 = list(start = "20", end = "25", label = "CDR2")
),
SEQ = "ACTGATCGTAGCTAGCTAGATGCTGATGTGTC"
),
ID2 = list(
SEQ = "ACTGATCGGCGGTGGCTAGCTGTGGGGCGCGCGACCGGGAAAA"
)
)
library(purrr)
Unpack2 <- function(L) {
map_dfr(L, .f = \(Row) unlist(Row))
}
Unpack1 <- function(NAME) {
LIST <- nested_list[[NAME]]
if(!is.null(LIST$FEAT)) {
tmp <- LIST$FEAT
Nms <- names(tmp)
DF <- Unpack2(tmp)
DF$feat <- Nms
} else {
DF <- data.frame(start = NA, end = NA, label = NA)
}
DF$ID <- NAME
DF$Seq <- LIST$SEQ
return(DF)
}
ListNames <- names(nested_list)
map_dfr(ListNames, Unpack1)
#> # A tibble: 3 × 6
#> start end label feat ID Seq
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1 15 CDR1 feat1 ID1 ACTGATCGTAGCTAGCTAGATGCTGATGTGTC
#> 2 20 25 CDR2 feat2 ID1 ACTGATCGTAGCTAGCTAGATGCTGATGTGTC
#> 3 <NA> <NA> <NA> <NA> ID2 ACTGATCGGCGGTGGCTAGCTGTGGGGCGCGCGACCGGGAAAA
Created on 2024-11-25 with reprex v2.1.1