I can try to create this kind of complex nested structure table using gt, where each treatment contains multiple pieces of information. However, it seems relatively difficult to achieve this using gtsummary. Below are the code and output.
# Load necessary packages
library(dplyr)
library(tidyr)
library(gt)
library(pharmaverseadam) # Package containing adae dataset
# Load data
data("adae")
data("adsl")
# Step 1: Calculate exposure time for each patient by treatment group, SOC, and PT
# First, identify all unique combinations of USUBJID, TRT01A, AESOC, AEDECOD
adsl <- pharmaverseadam::adsl %>%
select(USUBJID, TRTSDT, TRTEDT)
all_combinations <- adae %>%
select(USUBJID, TRT01A) %>%
left_join(adsl, by = "USUBJID") %>%
distinct() %>%
# Cross join with all SOC/PT combinations
crossing(
adae %>% select(AESOC, AEDECOD) %>% distinct()
)
# Get the actual AEs that occurred
# Modified to handle missing AENDT values
actual_aes <- adae %>%
select(USUBJID, TRT01A, AESOC, AEDECOD, ASTDT, AENDT) %>%
# Ensure start dates are valid
filter(!is.na(ASTDT)) %>%
# Join with ADSL to get TRTEDT for handling missing AENDT
left_join(adsl %>% select(USUBJID, TRTEDT), by = "USUBJID") %>%
# Replace missing AENDT with TRTEDT
mutate(AENDT = if_else(is.na(AENDT), TRTEDT, AENDT)) %>%
# Remove TRTEDT column as it's no longer needed
select(-TRTEDT)
# Calculate exposure time
exposure_data <- all_combinations %>%
# Left join to identify which combinations actually had AEs
left_join(
actual_aes,
by = c("USUBJID", "TRT01A", "AESOC", "AEDECOD")
) %>%
# Calculate exposure time based on whether AE occurred or not
mutate(
had_ae = !is.na(ASTDT),
exposure_days = case_when(
had_ae == TRUE ~ as.numeric(difftime(AENDT, ASTDT, units = "days")) + 1,
had_ae == FALSE ~ as.numeric(difftime(TRTEDT, TRTSDT, units = "days")) + 1 + 112
),
exposure_years = exposure_days / 365.25 # Convert to years
)
# Step 2: Count AEs by treatment group, SOC, and PT
ae_counts_pt <- exposure_data %>%
group_by(TRT01A, AESOC, AEDECOD) %>%
summarise(
n_with_ae = sum(had_ae, na.rm = TRUE), # Count of patients with this specific AE
total_patients = n_distinct(USUBJID), # Total patients in this treatment group
total_exposure_years = sum(exposure_years, na.rm = TRUE), # Total exposure years
.groups = "drop"
) %>%
mutate(
percent = 100 * n_with_ae / total_patients,
# Incidence rate per 100 patient-years
incidence_rate = 100 * n_with_ae / total_exposure_years
)
# Step 3: Calculate SOC level statistics
ae_counts_soc <- ae_counts_pt %>%
group_by(TRT01A, AESOC) %>%
summarise(
n_with_ae = sum(n_with_ae), # Sum of all PTs within this SOC
total_patients = first(total_patients), # Same for all PTs within treatment
total_exposure_years = sum(total_exposure_years), # Sum exposure years
.groups = "drop"
) %>%
mutate(
percent = 100 * n_with_ae / total_patients,
incidence_rate = 100 * n_with_ae / total_exposure_years
)
# Step 4: Calculate total counts across all treatments for sorting
# Calculate PT totals across all treatments
pt_total_counts <- ae_counts_pt %>%
group_by(AESOC, AEDECOD) %>%
summarise(
pt_total = sum(n_with_ae),
.groups = "drop"
)
# Calculate SOC totals across all treatments
soc_total_counts <- ae_counts_soc %>%
group_by(AESOC) %>%
summarise(
soc_total = sum(n_with_ae),
.groups = "drop"
)
# Join with the PT data and sort
ae_sorted_pt <- ae_counts_pt %>%
left_join(pt_total_counts, by = c("AESOC", "AEDECOD")) %>%
left_join(soc_total_counts, by = "AESOC") %>%
# Add more significant indentation to PT (4 spaces)
mutate(TERM = paste0(" ", AEDECOD))
# Join with the SOC data and sort
ae_sorted_soc <- ae_counts_soc %>%
left_join(soc_total_counts, by = "AESOC") %>%
# Use SOC as the term (no indentation)
mutate(TERM = AESOC)
# Combine PT and SOC data and keep the sorting columns
ae_combined <- bind_rows(
ae_sorted_soc %>% mutate(level = "SOC"),
ae_sorted_pt %>% mutate(level = "PT")
)
# Create a sorting key for each SOC based on total count
soc_order <- soc_total_counts %>%
arrange(desc(soc_total), AESOC) %>%
mutate(soc_order = row_number())
# Add the SOC order to the combined data
ae_combined <- ae_combined %>%
left_join(soc_order, by = "AESOC")
# Sort the data:
# 1. First by SOC total (using the soc_order)
# 2. Then by level (SOC before PT)
# 3. For PTs within a SOC, sort by PT total
# 4. If PT totals are equal, sort alphabetically
ae_combined <- ae_combined %>%
arrange(
soc_order, # Sort by SOC total (descending)
desc(level == "SOC"), # SOC before PT
desc(pt_total), # Sort by PT total count (for PT rows)
TERM # If counts are equal, sort alphabetically
)
# Step 5: Reshape the data to create a nested structure with statistics under each treatment
# First, remove the sorting columns before pivoting
ae_combined_clean <- ae_combined %>%
select(-pt_total, -soc_order)
ae_nested <- ae_combined_clean %>%
# Pivot to create a nested structure
pivot_wider(
id_cols = c(AESOC, TERM, level),
names_from = TRT01A,
values_from = c(n_with_ae, percent, total_exposure_years, incidence_rate),
names_sep = "_" # This creates column names like "n_with_ae_Placebo"
)
# Get unique treatment groups
treatment_groups <- unique(ae_counts_pt$TRT01A)
# Create column names for each treatment group and statistic
col_names <- list()
for (trt in treatment_groups) {
col_names[[paste0("n_with_ae_", trt)]] <- "n"
col_names[[paste0("percent_", trt)]] <- "%"
col_names[[paste0("total_exposure_years_", trt)]] <- "Time at Risk"
col_names[[paste0("incidence_rate_", trt)]] <- "Incidence Rate"
}
# Create a gt table with nested structure
ae_table <- ae_nested %>%
# Remove AESOC column as it's redundant now
select(-AESOC) %>%
gt() %>%
tab_header(
title = "Adverse Events by System Organ Class and Preferred Term"
) %>%
cols_label(
TERM = "SOC/Preferred Term"
) %>%
# Add column labels
cols_label(.list = col_names) %>%
# Hide the level column
cols_hide(columns = "level")
# Dynamically add spanners for each treatment group
for (trt in treatment_groups) {
ae_table <- ae_table %>%
tab_spanner(
label = trt,
columns = starts_with(c(
paste0("n_with_ae_", trt),
paste0("percent_", trt),
paste0("total_exposure_years_", trt),
paste0("incidence_rate_", trt)
))
)
}
# Continue with formatting
ae_table <- ae_table %>%
# Format numbers
fmt_number(
columns = starts_with("percent_"),
decimals = 1
) %>%
fmt_number(
columns = starts_with("total_exposure_years_"),
decimals = 1
) %>%
fmt_number(
columns = starts_with("incidence_rate_"),
decimals = 2
) %>%
# Style the table
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = level == "SOC"
)
) %>%
tab_style(
style = cell_text(indent = px(20)),
locations = cells_body(
columns = TERM,
rows = level == "PT"
)
) %>%
tab_options(
row_group.background.color = "lightgrey"
) %>%
cols_align(
align = "left",
columns = TERM
) %>%
cols_align(
align = "right",
columns = -TERM)
ae_table
I would like to know how to achieve this using gtsummary. Thank you.
It's a complex table, but each part of it is straightforward enough.
First note that gtsummary would typically put statistics like n and percent in the same cell. For a table like this, we'll need to build a table each summary statistic then merge them all together.
The general outline to follow here is
library(cards)
library(gtsummary)
library(tidyverse)
################# 1. Prepare data
adae <- pharmaverseadam::adae |>
mutate(TRT01A = factor(TRT01A)) |>
filter(AESOC %in% unique(AESOC)[1:2]) |>
slice(.by = "AESOC", 1)
adsl <- pharmaverseadam::adsl |>
dplyr::filter(TRT01A != "Screen Failure") |>
mutate(TRT01A = factor(TRT01A))
# first some housekeeping, make a data frame with all subjects, soc, and ae
df_all_soc_ae <-
adsl[c("USUBJID", "TRT01A")] |>
mutate(
data =
adae[c("AESOC", "AEDECOD")] |>
distinct() |>
list()
) |>
unnest(data)
################# 2. Create an ARD
# Now create the ARD for rates of AE
ard_ae_rates <-
adae |>
ard_stack_hierarchical(
by = TRT01A,
variables = c(AESOC, AEDECOD),
id = USUBJID,
denominator = pharmaverseadam::adsl
)
# now create ARD with incidenence rates
ard_incidence_ae <-
adae |>
# counting number of AEs
summarise(
.by = c(USUBJID, TRT01A, AESOC, AEDECOD),
count = length(USUBJID)
) |>
right_join(
df_all_soc_ae,
by = c("USUBJID", "TRT01A", "AESOC", "AEDECOD")
) |>
mutate(
# first give a count of zero to those without an AE
count = coalesce(count, 0L),
# for simplicicty, assume everyone is followed for half a year
followup = 0.5
) |>
cardx::ard_incidence_rate(
time = "followup",
count = "count",
by = TRT01A,
strata = c(AESOC, AEDECOD),
unit_label = "years"
) |>
# keep the incidence rate and the total followup time
dplyr::filter(stat_name %in% c("estimate", "tot_person_time"))
# now repeat for SOC
ard_incidence_soc <-
adae |>
# counting number of AEs
summarise(
.by = c(USUBJID, TRT01A, AESOC),
count = length(USUBJID)
) |>
right_join(
distinct(df_all_soc_ae, USUBJID, TRT01A, AESOC),
by = c("USUBJID", "TRT01A", "AESOC")
) |>
mutate(
# first give a count of zero to those without an AE
count = coalesce(count, 0L),
# for simplicicty, assume everyone is followed for half a year
followup = 0.5
) |>
cardx::ard_incidence_rate(
time = "followup",
count = "count",
by = TRT01A,
strata = AESOC,
unit_label = "years"
) |>
# keep the incidence rate and the total followup time
dplyr::filter(stat_name %in% c("estimate", "tot_person_time"))
# next make the incidence ARDs look like the AE rates
ard_incidence_ae2 <-
ard_incidence_ae |>
dplyr::select(-all_ard_variables()) |>
rename(variable = group3, variable_level = group3_level)
ard_incidence_soc2 <-
ard_incidence_soc |>
dplyr::select(-all_ard_variables()) |>
rename(variable = group2, variable_level = group2_level)
################# 3. Build tables from ARD
tbl_n <-
tbl_ard_hierarchical(
ard_ae_rates,
by = "TRT01A",
variables = c("AESOC", "AEDECOD"),
statistic = ~"{n}"
) |>
remove_footnote_header(everything()) |>
modify_spanning_header(all_stat_cols() ~ "**{level}**") |>
modify_header(all_stat_cols() ~ "n")
tbl_p <-
tbl_ard_hierarchical(
ard_ae_rates,
by = "TRT01A",
variables = c("AESOC", "AEDECOD"),
statistic = ~"{p}%"
) |>
remove_footnote_header(everything()) |>
modify_spanning_header(all_stat_cols() ~ "**{level}**") |>
modify_header(all_stat_cols() ~ "%")
tbl_inc <-
bind_ard(
ard_incidence_ae2,
ard_incidence_soc2
) |>
tbl_ard_hierarchical(
by = "TRT01A",
variables = c("AESOC", "AEDECOD"),
statistic = ~"{estimate}"
) |>
remove_footnote_header(everything()) |>
modify_spanning_header(all_stat_cols() ~ "**{level}**") |>
modify_header(all_stat_cols() ~ "Incidence per 100PY")
tbl_followup <-
bind_ard(
ard_incidence_ae2,
ard_incidence_soc2
) |>
tbl_ard_hierarchical(
by = "TRT01A",
variables = c("AESOC", "AEDECOD"),
statistic = ~"{tot_person_time}"
) |>
remove_footnote_header(everything()) |>
modify_spanning_header(all_stat_cols() ~ "**{level}**") |>
modify_header(all_stat_cols() ~ "Tot. Follow-up")
################# 4. Merge to create final table
list(tbl_n, tbl_p, tbl_inc, tbl_followup) |>
tbl_merge(tab_spanner = FALSE) |>
# reorder columns
modify_table_body(
\(x) {
column_order <- x |> select(all_stat_cols()) |> names() |> sort()
relocate(x, all_of(column_order), .after = "label")
}
) |>
modify_abbreviation("PY = Person-years")