rgtsummary

Using tbl_summary with tbl_strata to generate multi-level Header columns


I am trying to create table like below using gtsummary.

enter image description here

I have sample code below that provides most of it.

# Load data
advs <- pharmaverseadam::advs %>%
  filter(SAFFL == "Y" & VSTESTCD %in% c('SYSBP', "DIABP") & !is.na(AVISIT)) %>%
  select(c(USUBJID, TRT01A, PARAMCD, PARAM, AVISIT, AVISITN, ADT, AVAL, CHG, PCHG, VSPOS, VSTPT))

# Summary mean prior to process
advs.smr <- advs %>%
  group_by(USUBJID, TRT01A, PARAMCD, PARAM, AVISIT, AVISITN, ADT) %>%
  summarise(AVAL.MEAN = mean(AVAL, na.rm = TRUE),
            CHG.MEAN = mean(CHG, na.rm = TRUE),
            .groups = 'drop') %>%
  mutate(visit_id = paste("Vis", sprintf("%03d", AVISITN), AVISIT, sep = "_")) %>%
  arrange(USUBJID, PARAMCD, AVISITN) %>%
  filter(AVISITN <= 4)

# Wide to Long
advs.smr.l <- advs.smr %>%
  pivot_longer(cols = c(AVAL.MEAN, CHG.MEAN),
               names_to = "anls_var",
               values_to = "Value") %>%
  filter(!is.nan(Value)) %>%
  mutate(anls_var = if_else(grepl("AVAL", anls_var), "Actual Value", "Change From Baseline"))

# Long to Wide
vs.parm <- advs.smr.l %>%
  select(-c(AVISITN, AVISIT, ADT)) %>%
  pivot_wider(names_from = visit_id,
              values_from = Value) %>%
  filter(PARAMCD == "SYSBP")

# Upcase column names
colnames(vs.parm) <- toupper(colnames(vs.parm))

# Create List of visit names
alvis <- unique(colnames(vs.parm)[grep("^VIS", colnames(vs.parm), ignore.case = TRUE)])
vis.nam <- setNames(as.list(sub(".*_", "", alvis)), alvis)

# Create table body
vs.parm %>%
  tbl_strata(
    strata = TRT01A,
    .tbl_fun = ~.x %>%
      tbl_summary(
        by = ANLS_VAR,
        include = c(starts_with("VIS")),
        type = c(starts_with("VIS")) ~ "continuous2",
        statistic = c(starts_with("VIS")) ~ c("{N_nonmiss}", "{mean} ({sd})", "{median}", "{min}, {max}"),
        digits = list(all_continuous() ~ c(1, 2, 3, 2, 1, 1)),
        label = vis.nam,
        missing = "no") %>%
      # Update Stat Labels
      add_stat_label(
        label = list(all_continuous() ~ c("n", "MEAN (SD)", "MEDIAN", "MIN, MAX"))) %>%
      # Update header
      modify_header(
        label ~ "Visit",
        all_stat_cols() ~ "**{level}**") %>%
      # Remove default footnote
      remove_footnote_header(columns = all_stat_cols()),
    .header = "**{strata}** <br>(N = {n})"
  )

Below is a screenshot of the output. There are couple of issues I am having.

  1. The N counts for header are driven by both AVAL & CHG records. These are doubled. (yellow highlights below)
  2. Any way to suppress the warnings on Baseline related to CHG. I understand these are genuine & it's ok if we can't.
  3. What is the best way to make the purple boxed part in screenshot blank? Thinking to use modify_table_body - not sure if there's a better way.

enter image description here


Solution

  • You can make this work with the code you've written, and I'll also show you how I made a similar table taking a slightly different approach.

    1. Using the code you've written, you can "fix" the baseline change values with a call to modify_table_body(~.x |> dplyr::mutate(stat_2 = ifelse(variable == "BASELINE", NA, stat_2)), and to "fix" the doubling of the Ns in the header you can use .header = "**{strata}** <br>(N = {n/2})".

    2. I've created a similar table in the past, and I opted to build one table for AVAL and one table for CHG, then merge them. In the example below, it's a slightly different table because instead of a single lab measure being summarized, it creates a very long table with one section per lab.

    library(gtsummary)
    library(dplyr)
    theme_gtsummary_compact()
    
    # first create df that is one line per subject
    df_adlb <-
      pharmaverseadam::adlb |> 
      filter(.by = c(USUBJID, VISIT), LBTESTCD == "ALB", row_number() == 1L, grepl("SCREENING|WEEK", VISIT)) |>
      tidyr::pivot_wider(
        id_cols = c(USUBJID, ARM, LBTESTCD, LBTEST),
        names_from = VISIT,
        values_from = c("AVAL", "CHG")
      )
    
    # create a table for the observed values at each visit
    tbl_aval <-
      df_adlb |> 
      select(ARM, LBTEST, starts_with("AVAL_")) |> 
      rename_with(~stringr::str_remove(., "^AVAL_")) |> 
      tbl_strata_nested_stack(
        strata = LBTEST,
        ~ .x |> 
          tbl_summary(
            by = ARM,
            type = all_continuous() ~ "continuous2",
            statistic = all_continuous() ~ c("{length}", "{mean} ({sd})", "{median}", "{min}, {max}"),
            digits = all_continuous() ~ c(length = 0, 
                                          mean = 2, 
                                          sd = 2, 
                                          median = 2, 
                                          min = 2, 
                                          max = 2),
            label = as.list(names(.x)) |> setNames(names(.x)),
            missing = "no"
          )
      ) 
    
    # create a table for the change values at each visit
    tbl_chg <-
      df_adlb |> 
      select(ARM, LBTEST, starts_with("CHG_")) |> 
      rename_with(~stringr::str_remove(., "^CHG_")) |> 
      tbl_strata_nested_stack(
        strata = LBTEST,
        ~ .x |> 
          tbl_summary(
            by = ARM,
            type = all_continuous() ~ "continuous2",
            statistic = all_continuous() ~ c("{length}", "{mean} ({sd})", "{median}", "{min}, {max}"),
            digits = all_continuous() ~ c(length = 0, 
                                          mean = 2, 
                                          sd = 2, 
                                          median = 2, 
                                          min = 2, 
                                          max = 2),
            include = -"SCREENING 1",
            label = as.list(names(.x)) |> setNames(names(.x)),
            missing = "no"
          )
      ) 
    
    # merge tables together and do some final styling
    t_lbt01 <-
      list(tbl_aval, tbl_chg) |> 
      tbl_merge(tab_spanner = FALSE) |> 
      modify_spanning_header(all_stat_cols() ~ "**{level}**  \n(N = {n})") |> 
      modify_header(
        all_stat_cols() & ends_with("_1") ~ "Value at Visit", # after the merge, values from the first table end with `_1`
        all_stat_cols() & ends_with("_2") ~ "Change from Baseline", # after the merge, values from the first table end with `_2`
        label = ""
      ) |> 
      modify_table_body(
        ~ .x |> 
          dplyr::relocate(
            c(starts_with("stat_1"), starts_with("stat_2"), starts_with("stat_3")), 
            .after = "label"
          ) |> 
          mutate(label = ifelse(label == "length", "n", label))
      )
    t_lbt01
    

    enter image description here