I have created the following forest plot (Plot 1), but I would like to add labels/titles in line and above the y-axis values (i.e., "Analyte") and above the p-values included on each of the panels/forest plot (i.e., "p-value"). In plot 1, I have managed to add a the title/label but am not able to position it in line with the values of "Assay" on the y-axis. Plot 2 below shows the desired output.
Plot 1 (Current Output):
As an example, the following plot (Plot 2) demonstrates how I would like the title/labels to be displayed (i.e., I would like them to appear in line with the relevant information analytes/p-values).
Plot 2 (Desired Layout):
Question: How can I modify my existing code for plot 1 to create plot 2?
Note that there is one value of QM_P_trend for each comparison (AvsB and CvsB) of each Assay/protein, but it is repeated within the data for each level of tertile.
Thank you!
Below is the R code I used to generate Plot 1, along with the data:
###################################################################################
# CREATE DATA -------------------------------------------------------------------#
###################################################################################
data <- structure(list(Carrier_comparison = c("AvsB", "AvsB", "AvsB", "CvsB",
"CvsB", "CvsB", "AvsB", "AvsB", "AvsB", "CvsB", "CvsB", "CvsB",
"AvsB", "AvsB", "AvsB", "CvsB", "CvsB", "CvsB"), Tertile = structure(c(3L,
2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L,
1L), levels = c("Tertile 3 (63-70)", "Tertile 2 (55-62)", "Tertile 1 (40-54)"
), class = "factor"), Assay = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("ADGRV1",
"CD63", "PRDX3"), class = "factor"), BETA = c(0.0536645935253193,
0.00934331381571756, -0.0502503436470944, 0.00816704351606194,
0.0946005745129161, -0.0749759598409221, -0.0600444465527413,
0.0601560795464937, -0.0640637292430954, 0.0219960172083058,
-0.148417379449254, -0.0346517428785544, 0.118618573404726, 0.0822558056705285,
0.15602529221275, -0.0326073326703257, -0.054029294502272, -0.0346020391169203
), SE = c(0.11057661927178, 0.113129029834029, 0.126265725049744,
0.0823378264579776, 0.0866601604863716, 0.100479295994743, 0.102816408820442,
0.106832143822942, 0.117433994065687, 0.0774019365360251, 0.0821098859062016,
0.0932959318751491, 0.102738955735482, 0.108841714570024, 0.118096903894084,
0.0781497668089097, 0.0811784837707031, 0.0931611680323888),
P_uncorrected = c(0.627642070133785, 0.934211300388773, 0.690846714020907,
0.921016957706137, 0.275432313055544, 0.455911706235102,
0.559432213622232, 0.573597978019841, 0.585625011701019,
0.776350390578681, 0.0711164128651439, 0.710460149142023,
0.248707272854769, 0.450125272681661, 0.187040523394829,
0.676619698998927, 0.505916944765656, 0.71045895979309),
QM_P_trend = c(0.545836080293994, 0.545836080293994, 0.545836080293994,
0.7479209500248, 0.7479209500248, 0.7479209500248, 0.867112096895725,
0.867112096895725, 0.867112096895725, 0.57166587344432, 0.57166587344432,
0.57166587344432, 0.880298751128355, 0.880298751128355, 0.880298751128355,
0.949181439539369, 0.949181439539369, 0.949181439539369)), row.names = c(NA,
-18L), class = "data.frame")
###################################################################################
# CREATE PLOTS-------------------------------------------------------------------#
###################################################################################
# Packages
library(tidyverse) # for maipulating data frames
# Amended forest plot function
forestplot_trend <- function (df, name = name, estimate = estimate, se = se, pvalue = NULL,
ptrend = NULL, colour = NULL, shape = NULL, logodds = FALSE,
psignif = 0.05, facet_var = NULL, ci = 0.95, ...)
{
# Ensure input is a data frame and logodds is a logical value
stopifnot(is.data.frame(df))
stopifnot(is.logical(logodds))
# Convert input column names to quosures for tidy evaluation
name <- enquo(name)
estimate <- enquo(estimate)
se <- enquo(se)
pvalue <- enquo(pvalue)
ptrend <- enquo(ptrend)
facet_var <- enquo(facet_var)
colour <- enquo(colour)
shape <- enquo(shape)
# Capture additional arguments passed to the function
args <- list(...)
# Compute the confidence interval constant (Z-score for normal distribution)
const <- stats::qnorm(1 - (1 - ci)/2)
# Prepare data frame: factorize `name` column, compute confidence intervals, and format labels
df <- df %>%
dplyr::mutate(
`:=`(!!name, factor(!!name, levels = !!name %>% unique() %>% rev(), ordered = TRUE)), # Reverse factor levels
.xmin = !!estimate - const * !!se, # Lower CI bound
.xmax = !!estimate + const * !!se, # Upper CI bound
.filled = TRUE, # Default filled state
.label = sprintf("%.2f", !!estimate) # Format estimate values as strings
)
# If logodds is TRUE, exponentiate the estimates and confidence intervals
if (logodds) {
df <- df %>% mutate(
.xmin = exp(.data$.xmin),
.xmax = exp(.data$.xmax),
`:=`(!!estimate, exp(!!estimate))
)
}
# If p-value is provided, mark significant values based on threshold (psignif)
if (!rlang::quo_is_null(pvalue)) {
df <- df %>% dplyr::mutate(.filled = !!pvalue < !!psignif)
}
# Initialize ggplot with estimate on x-axis and name on y-axis and ptrend values
g <- ggplot2::ggplot(df, aes(x = !!estimate, y = !!name)) +
geom_text(
aes(x = Inf, y = !!name, label = sprintf("%.3f", !!ptrend)),
hjust = 1.3, vjust = 0.5, family = "Arial"
)
# Apply a forest plot theme and add background elements
g <- g + theme_forest() + scale_colour_ng_d() + scale_fill_ng_d() +
geom_stripes() +
geom_vline(xintercept = ifelse(logodds, 1, 0), linetype = "solid", size = 0.4, colour = "black") # Reference line
# Adjust the x-axis to give more space for the labels
g <- g + coord_cartesian(xlim = c(min(df[[as_label(estimate)]]) - 0.5, max(df[[as_label(estimate)]]) + 0.5)) # Increase xlim range
# Add effect estimates with confidence intervals
g <- g + geom_effect(
ggplot2::aes(
xmin = .data$.xmin, xmax = .data$.xmax,
colour = !!colour, shape = !!shape, filled = .data$.filled
),
position = ggstance::position_dodgev(height = 0.5)
) +
ggplot2::scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
guides(colour = guide_legend(reverse = TRUE), shape = guide_legend(reverse = TRUE))
# Add optional title, subtitle, caption, x-label, and y-label if provided
if ("title" %in% names(args)) {
g <- g + labs(title = args$title)
}
if ("subtitle" %in% names(args)) {
g <- g + labs(subtitle = args$subtitle)
}
if ("caption" %in% names(args)) {
g <- g + labs(caption = args$caption)
}
if ("xlab" %in% names(args)) {
g <- g + labs(x = args$xlab)
}
if (!"ylab" %in% names(args)) {
args$ylab <- ""
}
g <- g + labs(y = args$ylab)
# Set axis limits if provided
if ("xlim" %in% names(args)) {
g <- g + coord_cartesian(xlim = args$xlim)
}
if ("ylim" %in% names(args)) {
g <- g + ylim(args$ylim)
}
# Adjust x-axis tick marks if provided and logodds is FALSE
if ("xtickbreaks" %in% names(args) & !logodds) {
g <- g + scale_x_continuous(breaks = args$xtickbreaks)
}
return(g)
}
# Create the forest plot function
forest_plot_data <- function(proteins, data, main_comparison) {
# Define the desired order for Carrier_comparison based on main_comparison
comparison_levels <- if (main_comparison == "C") {
c("CvsB", "AvsB")
} else {
c("AvsB", "CvsB")
}
# Filter data for selected proteins
data <- data %>%
filter(Assay %in% proteins)
# Ensure the Carrier_comparison is a factor with the desired level order
data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
# Plot
forestplot_trend(
df = data,
name = Assay, # Use combined label for y-axis
estimate = BETA,
se = SE,
pvalue = P_uncorrected,
ptrend = QM_P_trend, # Add QM_P_trend values
psignif = 0.01, # fill estimate if significant at this level
ci = 0.95,
logodds = FALSE,
xlab = "Beta (95% confidence intervals)",
ylab="Analyte",
colour = Tertile,
shape = Tertile,
position = position_dodge(width = 0.7) # Increase dodge width for better separation
) +
facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
theme(
text = element_text(family = "Arial"), # Change "Arial" to your preferred font
strip.text = element_text(size = 14, color = "black", hjust = 0.5, vjust = 5.5, margin = margin(t = 15, b = 15)),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
axis.title.x = element_text(size = 14, color = "black", vjust = 0.5),
axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.03, hjust = -5), # Rotated y-axis title
legend.title = element_text(size = 14, color = "black"),
legend.text = element_text(size = 12, color = "black"), # Customize legend labels
legend.key.size = unit(1.5, "lines")
)
}
# Create list of proteins
proteins <- levels(data$Assay)
# Generate plots
CvsB_forest_plot <- forest_plot_data(proteins, data, main_comparison = "C")
CvsB_forest_plot
I managaged to align the y-axis title using the margin
argument instead of hjust
. hjust
did not work as expected, due to using the argument angle = 0
. This is because justification works relative to the text direction.
To add the p-value labels I then used geom_text
.
Plot produced:
Code used to produce the plot:
###################################################################################
# CREATE PLOTS-------------------------------------------------------------------#
###################################################################################
# Load necessary libraries
library(tidyverse)
# Amended forest plot function
forestplot_trend <- function (df, name = name, estimate = estimate, se = se, pvalue = NULL,
ptrend = NULL, colour = NULL, shape = NULL, logodds = FALSE,
psignif = 0.05, facet_var = NULL, ci = 0.95, ...)
{
# Ensure input is a data frame and logodds is a logical value
stopifnot(is.data.frame(df))
stopifnot(is.logical(logodds))
# Convert input column names to quosures for tidy evaluation
name <- enquo(name)
estimate <- enquo(estimate)
se <- enquo(se)
pvalue <- enquo(pvalue)
ptrend <- enquo(ptrend)
facet_var <- enquo(facet_var)
colour <- enquo(colour)
shape <- enquo(shape)
# Capture additional arguments passed to the function
args <- list(...)
# Compute the confidence interval constant (Z-score for normal distribution)
const <- stats::qnorm(1 - (1 - ci)/2)
# Prepare data frame: factorize `name` column, compute confidence intervals, and format labels
df <- df %>%
dplyr::mutate(
`:=`(!!name, factor(!!name, levels = !!name %>% unique() %>% rev(), ordered = TRUE)), # Reverse factor levels
.xmin = !!estimate - const * !!se, # Lower CI bound
.xmax = !!estimate + const * !!se, # Upper CI bound
.filled = TRUE, # Default filled state
.label = sprintf("%.2f", !!estimate) # Format estimate values as strings
)
# If logodds is TRUE, exponentiate the estimates and confidence intervals
if (logodds) {
df <- df %>% mutate(
.xmin = exp(.data$.xmin),
.xmax = exp(.data$.xmax),
`:=`(!!estimate, exp(!!estimate))
)
}
# If p-value is provided, mark significant values based on threshold (psignif)
if (!rlang::quo_is_null(pvalue)) {
df <- df %>% dplyr::mutate(.filled = !!pvalue < !!psignif)
}
# Initialize ggplot with estimate on x-axis and name on y-axis and ptrend values
g <- ggplot2::ggplot(df, aes(x = !!estimate, y = !!name))
# Apply a forest plot theme and add background elements
g <- g + theme_forest() + scale_colour_ng_d() + scale_fill_ng_d() +
geom_stripes() +
geom_vline(xintercept = ifelse(logodds, 1, 0), linetype = "solid", size = 0.4, colour = "black") # Reference line
# Adjust the x-axis to give more space for the labels
g <- g + coord_cartesian(xlim = c(min(df[[as_label(estimate)]]) - 0.5, max(df[[as_label(estimate)]]) + 0.5)) # Increase xlim range
# Add effect estimates with confidence intervals
g <- g + geom_effect(
ggplot2::aes(
xmin = .data$.xmin, xmax = .data$.xmax,
colour = !!colour, shape = !!shape, filled = .data$.filled
),
position = ggstance::position_dodgev(height = 0.5)
) +
ggplot2::scale_shape_manual(values = c(21L, 22L, 23L, 24L, 25L)) +
guides(colour = guide_legend(reverse = TRUE), shape = guide_legend(reverse = TRUE))
# Add optional title, subtitle, caption, x-label, and y-label if provided
if ("title" %in% names(args)) {
g <- g + labs(title = args$title)
}
if ("subtitle" %in% names(args)) {
g <- g + labs(subtitle = args$subtitle)
}
if ("caption" %in% names(args)) {
g <- g + labs(caption = args$caption)
}
if ("xlab" %in% names(args)) {
g <- g + labs(x = args$xlab)
}
if (!"ylab" %in% names(args)) {
args$ylab <- ""
}
g <- g + labs(y = args$ylab)
# Set axis limits if provided
if ("xlim" %in% names(args)) {
g <- g + coord_cartesian(xlim = args$xlim)
}
if ("ylim" %in% names(args)) {
g <- g + ylim(args$ylim)
}
# Adjust x-axis tick marks if provided and logodds is FALSE
if ("xtickbreaks" %in% names(args) & !logodds) {
g <- g + scale_x_continuous(breaks = args$xtickbreaks)
}
return(g)
}
# Create the forest plot function with added labels
forest_plot_data <- function(proteins, data, main_comparison) {
# Define the desired order for Carrier_comparison based on main_comparison
comparison_levels <- if (main_comparison == "C") {
c("CvsB", "AvsB")
} else {
c("AvsB", "CvsB")
}
# Filter data for selected proteins
data <- data %>%
filter(Assay %in% proteins)
# Ensure the Carrier_comparison is a factor with the desired level order
data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
# Plot
plot <- forestplot_trend(
df = data,
name = Assay, # Use combined label for y-axis
estimate = BETA,
se = SE,
pvalue = P_uncorrected,
ptrend = QM_P_trend, # Add QM_P_trend values
psignif = 0.01, # fill estimate if significant at this level
ci = 0.95,
logodds = FALSE,
xlab = "Beta (95% confidence intervals)",
ylab = "Analyte",
colour = Tertile,
shape = Tertile,
position = position_dodge(width = 0.7) # Increase dodge width for better separation
) +
facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
theme(
text = element_text(family = "Arial"), # Change "Arial" to your preferred font
strip.text = element_text(size = 16, color = "black", hjust = 0.5, vjust = 5.5, margin = margin(t = 18, b = 15)),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
axis.title.x = element_text(size = 14, color = "black", vjust = 0.5),
axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.00, margin = margin(r = -50)), # Rotated y-axis title
legend.title = element_text(size = 14, color = "black"),
legend.text = element_text(size = 12, color = "black"), # Customize legend labels
legend.key.size = unit(1.5, "lines")
)
# Add "p-value" label above p-values in each panel
plot <- plot +
geom_text(
data = data.frame(Carrier_comparison = comparison_levels,
label = "bolditalic('p') * bold('-value')"), # Correct expression for bold and italic
aes(x = Inf, y = Inf, label = label),
hjust = 1.1, vjust = 1.5, size = 4, family = "Arial", color = "black",
parse = TRUE # Use parse = TRUE to interpret the label as an expression
)
# Add QM_P_trend values aligned with Assay values on the y-axis
plot <- plot +
geom_text(
aes(x = Inf, y = Assay, label = sprintf("%.3f", QM_P_trend)), # Display QM_P_trend values
hjust = 1.3, vjust = 0.5, family = "Arial", size = 4, color = "black"
)
return(plot)
}
# Create list of proteins
proteins <- levels(data$Assay)
# Generate plots
CvsB_forest_plot <- forest_plot_data(proteins, data, main_comparison = "C")
# Display the plot
CvsB_forest_plot