Problem:
I have a hierarchical set of data, where I want to generate a grouped boxplot. Colour by one variable and fill by another. This can easily be done in ggplot2 (example provided). But I am unable to achieve the same using plotly(). Note: I do not want to use ggplotly()
as it is considerably slower with large datasets compared to plotly().
Bonus: how can I define 'group_c' colours manually with the plotly() solution, this can be done with scale_colour_manual()
in ggplot2.
Data:
group_a <- 900:904
group_b <- paste0(rep(group_a, each = 3), '_', rep(c('01', '02', '03'), times = length(group_a)))
group_c <- rep(c("A", "B"), times = length(group_b))
group_date <- seq.Date(from = as.Date("2025-01-01"), by = "day", length.out = length(lot_ids))
group_date <- rep(group_date, each = 3)
x <- rep(runif(14), times = length(group_a) * 3 * 2)
# Create the dataframe
df <- data.frame(
group_a = rep(group_a, each = 3 * 2 * 14),
group_b = rep(group_b, each = 14 * 2),
group_date = rep(group_date, each = 2 * 14),
group_c = group_c,
x = x
)
ggplot method:
df %>%
ggplot(aes(
x = group_date,
y = x,
group = interaction(group_c, group_b),
fill = factor(group_a),
colour = group_c
)) +
geom_boxplot()
You mean like this? Adjust the group_c
colors in transforms
to answer your bonus question. I could not figure out, how to make the boxplots wider.
library(plotly)
library(dplyr)
group_a <- 900:904
group_b <- paste0(rep(group_a, each = 3), '_', rep(c('01', '02', '03'), times = length(group_a)))
group_c <- rep(c("A", "B"), times = length(group_b))
group_date <- seq.Date(from = as.Date("2025-01-01"), by = "day", length.out = length(group_a))
group_date <- rep(group_date, each = 3)
x <- rep(runif(14), times = length(group_a) * 3 * 2)
df <- data.frame(
group_a = rep(group_a, each = 3 * 2 * 14),
group_b = rep(group_b, each = 14 * 2),
group_date = rep(group_date, each = 2 * 14),
group_c = group_c,
x = x
)
plot_ly(data = df) %>%
add_boxplot(
x = ~group_date,
y = ~x,
type = "box",
name = ~group_b,
color = ~factor(group_a),
transforms = list(
list(
type = 'groupby',
groups = ~group_c,
styles = list(
list(target = "A", value = list(line = list(color = "red"))),
list(target = "B", value = list(line = list(color = "blue")))
)
)
)
) %>%
layout(
boxmode = "group",
boxgap = 0,
boxgroupgap = 0
)
You can use a color ramp to adjust the outline colors on unique group_c values:
library(plotly)
library(dplyr)
library(grDevices) # For colorRampPalette
group_a <- 900:904
group_b <- paste0(rep(group_a, each = 3), '_', rep(c('01', '02', '03'), times = length(group_a)))
group_c <- rep(c("A", "B", "C"), times = length(group_b) / 3)
group_date <- seq.Date(from = as.Date("2025-01-01"), by = "day", length.out = length(group_a))
group_date <- rep(group_date, each = 3)
x <- rep(runif(14), times = length(group_a) * 3 * 3)
df <- data.frame(
group_a = rep(group_a, each = 3 * 3 * 14),
group_b = rep(group_b, each = 14 * 3),
group_date = rep(group_date, each = 3 * 14),
group_c = group_c,
x = x
)
# Extract unique values of group_c and define colors dynamically
unique_group_c <- unique(df$group_c)
color_palette <- colorRampPalette(c("red", "blue"))(length(unique_group_c))
# Create styles list
styles_list <- lapply(seq_along(unique_group_c), function(i) {
list(
target = unique_group_c[i],
value = list(line = list(color = color_palette[i]))
)
})
# Create the plot
plot_ly(data = df) %>%
add_boxplot(
x = ~group_date,
y = ~x,
type = "box",
name = ~group_b,
color = ~factor(group_a),
transforms = list(
list(
type = 'groupby',
groups = ~group_c,
styles = styles_list
)
)
) %>%
layout(
boxmode = "group",
boxgap = 0,
boxgroupgap = 0
)