rtidyversecrosstabpurrrjanitor

tidyverse: Cross tables of one variable with all other variables in data.frame


I want to make cross table of a variable with all other variables in the data.frame.

library(tidyverse)
library(janitor)

humans <- starwars %>%
  filter(species == "Human")

humans %>%
  janitor::tabyl(gender, eye_color)



gender blue blue-gray brown dark hazel yellow
 female    3         0     5    0     1      0
   male    9         1    12    1     1      2

humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  purrr::map(.f = ~janitor::tabyl(dat = humans, gender, .x))

Error: Unknown columns `blond`, `none`, `brown`, `brown, grey`, `brown` and ... 
Call `rlang::last_error()` to see a backtrace

Solution

  • tably takes names as arguments and you passed a vector to it.

    If you use imap you'll have access to the name of the column, that you can convert to a symbol, and as janitor supports quasi-quotation you can write:

    humans %>%
      select_if(is.character) %>%
      select(-name, -gender) %>%
      imap(.f = ~janitor::tabyl(dat = humans, !!sym(.y), gender))
    #$`hair_color`
    #     hair_color female male
    #         auburn      1    0
    #   auburn, grey      0    1
    #  auburn, white      0    1
    #          black      1    7
    #          blond      0    3
    #          brown      6    8
    #    brown, grey      0    1
    #           grey      0    1
    #           none      0    3
    #          white      1    1
    # 
    # $skin_color
    #  skin_color female male
    #        dark      0    4
    #        fair      3   13
    

    Interestingly tabyl.data.frame calls an unexported function that works on symbols so by calling it directly we can skip the unquoting and use base R.

    cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
    lapply(cols, function(x) janitor:::tabyl_2way(humans, as.name(x), quote(gender)))
    # [[1]]
    #     hair_color female male
    #         auburn      1    0
    #   auburn, grey      0    1
    #  auburn, white      0    1
    #          black      1    7
    #          blond      0    3
    #          brown      6    8
    #    brown, grey      0    1
    #           grey      0    1
    #           none      0    3
    #          white      1    1
    # 
    # [[2]]
    #  skin_color female male
    #        dark      0    4
    

    To make it work with xtable @akrun's suggestion works here as well :

    humans %>%
      select_if(is.character) %>%
      select(-name, -gender) %>%
      imap(.f = ~tabyl(dat = humans, !!sym(.y), gender) %>% rename_at(1,~"x")) %>%
      xtableList
    

    or

    cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
    l <- lapply(cols, function(x) {
      res <- janitor:::tabyl_2way(humans, as.name(x), quote(gender))
      names(res)[1] <- "x"
      res
    })
    xtableList(l)