rformattinggtsummary

Add count in variables label in gt_summary


I'm using gt_summary for my project, and so far I include the missing values as a separate row for each variable, through the missing arg of tbl_summary.

However, I am trying to add in the variable label the total number of observations for this variable.

I tried both by adding "n = {n}" to the label list I pass to tbl_summary, and using add_stat_label (label = "n = {n}"), but with these, the placeholder {n} is not interpreted as such and the final table is not what I'm looking for.

I know I could add a custom stat column, by I would prefer not to increase the col numbers

Is there any built-in function in gt_summary that can achieve this ?

A (dys)functional example :

library(tidyverse)
library(gtsummary)
library(gt)

database = data.frame(
  INDIV_AGE = rnorm(100, mean = 50, sd = 4),
  INDIV_GENDER = rbinom(100, size=1, prob = 0.6),
  INDIV_ETHNICS = sample(c("North America", "Western Europe", "Africa", "Eastern Europe", "Asia", "Other"), size = 100, replace = T, prob = c(0.3, 0.2, 0.4, 0.02, 0.01, 0.07)),
  INDIV_ECOGRP = sample(c(1,2,3,4), size = 100, replace = T, prob = c(0.6, 0.1, 0.2, 0.1)),
  ENV_POLLEVEL = rpois(100, lambda = 4), 
  ENV_FLOODPROFILE = sample(c("Low", "Intermediate", "High", "Extreme"), size = 100, replace = T, prob = c(0.1, 0.65, 0.2, 0.05))
)
database[] <- lapply(database, function(x) { x[sample(seq_along(x), 0.1 * length(x))] <- NA; x })



a = database |> 
  tbl_summary(
    include = everything(),
    missing = "always",
    missing_text = "Missing data",
    missing_stat = "{N_miss} ({p_miss}%)",
    type = INDIV_AGE ~ "continuous",
    statistic = list(
      all_continuous() ~ "{median} [{p25}-{p75}]",
      all_categorical() ~ "{n} ({p}%)"
      ),
    by = INDIV_GENDER
  ) |> 
  modify_header(
    label = "",
    stat_2 = "**Yes**\nN={n}",
    stat_1 = "**No**\nN={n}") |>
  modify_spanning_header(all_stat_cols()~"**Gender**") |>
  add_p() |> 
  bold_p() |>
  add_stat_label(
    label = "n = {n}"
  )

print(a)  

It produces something like this : Current output

But I would like something like this with the corresponding numbers replacing the {n} Expected output

With this sample code, NA is randomly generated at 10% of the total so it's not really interesting, but in real data, NA differs between variables

Thank you very much !

Sigil


Solution

  • I believe this is what you want and it can be achieved for example with modify_table_body

    library(tidyverse)
    library(gtsummary)
    library(gt)
    
    set.seed(1)
    
    database = data.frame(
      INDIV_AGE = rnorm(100, mean = 50, sd = 4),
      INDIV_GENDER = rbinom(100, size=1, prob = 0.6),
      INDIV_ETHNICS = sample(c("North America", "Western Europe", "Africa", "Eastern Europe", "Asia", "Other"), size = 100, replace = T, prob = c(0.3, 0.2, 0.4, 0.02, 0.01, 0.07)),
      INDIV_ECOGRP = sample(c(1,2,3,4), size = 100, replace = T, prob = c(0.6, 0.1, 0.2, 0.1)),
      ENV_POLLEVEL = rpois(100, lambda = 4), 
      ENV_FLOODPROFILE = sample(c("Low", "Intermediate", "High", "Extreme"), size = 100, replace = T, prob = c(0.1, 0.65, 0.2, 0.05))
    )
    database[] <- lapply(database, function(x) { x[sample(seq_along(x), 0.1 * length(x))] <- NA; x })
    
    database |> 
      tbl_summary(
        include = everything(),
        missing = "always",
        missing_text = "Missing data",
        missing_stat = "{N_miss} ({p_miss}%)",
        type = INDIV_AGE ~ "continuous",
        statistic = list(
          all_continuous() ~ "{median} [{p25}-{p75}]",
          all_categorical() ~ "{n} ({p}%)"
        ),
        by = INDIV_GENDER
      ) |> 
      modify_header(
        label = "",
        stat_2 = "**Yes**\nN={n}",
        stat_1 = "**No**\nN={n}") |>
      modify_spanning_header(all_stat_cols()~"**Gender**") |>
      add_p() |> 
      add_n() |> 
      modify_table_body(
        ~ .x |> 
          mutate(label = ifelse(is.na(n), label, paste0(label, ", n=", n))) |>  # Add (n=xx) only when n is not NA
          select(-n)  # Remove the N column
      )
    

    giving

    out