rdplyrshinylapplyshinywidgets

How to obtain most nested list values by name in R


I have the following dataset.

data <- data.frame("group1" = c(rep("a",10),rep("b",10)),"group2" = c(rep(c("a","b"),5),rep(c("c","d"),5)))
data
   group1 group2
1       a      a
2       a      b
3       a      a
4       a      b
5       a      a
6       a      b
7       a      a
8       a      b
9       a      a
10      a      b
11      b      c
12      b      d
13      b      c
14      b      d
15      b      c
16      b      d
17      b      c
18      b      d
19      b      c
20      b      d

I then use the create_tree function from the shinyWidgets package, which gives me a list with the following structure.

tree <- create_tree(data)
str(tree)
List of 2
 $ :List of 3
  ..$ text    : chr "a"
  ..$ id      : chr "tree7337428"
  ..$ children:List of 2
  .. ..$ :List of 2
  .. .. ..$ text: chr "a"
  .. .. ..$ id  : chr "tree7904513"
  .. ..$ :List of 2
  .. .. ..$ text: chr "b"
  .. .. ..$ id  : chr "tree7346861"
 $ :List of 3
  ..$ text    : chr "b"
  ..$ id      : chr "tree6379478"
  ..$ children:List of 2
  .. ..$ :List of 2
  .. .. ..$ text: chr "c"
  .. .. ..$ id  : chr "tree500704"
  .. ..$ :List of 2
  .. .. ..$ text: chr "d"
  .. .. ..$ id  : chr "tree8058601"

From this list, I would like to obtain the following dataset.

  group1 group2     tree_id
1      a      a tree7904513
2      a      b tree7346861
3      b      c  tree500704
4      b      d tree8058601

So basically I want to extract the terminal id's from the list. Any help appreciated.


Solution

  • How about this:

    do.call(rbind, lapply(
      tree,
      function(tr) cbind(group1 = tr[[1]],
                         do.call(rbind.data.frame, tr$children))
    ))
    #   group1 text          id
    # 1      a    a tree5278958
    # 2      a    b  tree864879
    # 3      b    c tree7348978
    # 4      b    d tree2004898
    

    The above assumes that the tree is of known depth. If not, perhaps this recursive function will work with a variably-nested tree list:

    fun <- function(tree, n = 1) {
      if (is.list(tree) && is.null(names(tree))) {
        do.call(rbind.data.frame, lapply(tree, fun))
      } else if ("children" %in% names(tree)) {
        out <- setNames(data.frame(a=tree$text), paste0("group", n))
        cbind(out, do.call(rbind.data.frame, lapply(tree$children, fun, n = n+1)))
      } else {
        setNames(as.data.frame(tree[c("text", "id")]),
                 c(paste0("group", n), "id"))
      }
    }
    fun(tree)
    #   group1 group2          id
    # 1      a      a tree5278958
    # 2      a      b  tree864879
    # 3      b      c tree7348978
    # 4      b      d tree2004898
    

    Caveat emptor, I haven't tested this second function with anything other than this sample data. I'm sure it'll fail with differently-structured data.


    Data

    data <- structure(list(group1 = c("a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b"), group2 = c("a", "b", "a", "b", "a", "b", "a", "b", "a", "b", "c", "d", "c", "d", "c", "d", "c", "d", "c", "d")), class = "data.frame", row.names = c(NA, -20L))
    # tree <- shinyWidgets::create_tree(data)
    tree <- list(list(text = "a", id = "tree1574771", children = list(list(text = "a", id = "tree5278958"), list(text = "b", id = "tree864879"))), list(text = "b", id = "tree339448", children = list(list(text = "c", id = "tree7348978"), list(text = "d", id = "tree2004898"))))