I have a data frame with the following structure:
x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 proportion index cumulative_proportion
1 0 -1 0 0 0 -1 1 1 0 1 0 0 0.28423 1 0.28423
2 0 -1 0 0 0 -1 1 1 1 1 0 0 0.20511 2 0.48934
3 1 -1 0 0 0 -1 1 1 0 1 0 0 0.05751 3 0.54685
4 0 -1 0 0 1 -1 1 1 0 1 0 0 0.02388 4 0.57073
5 0 -1 0 0 0 -1 1 1 0 1 1 0 0.02217 5 0.59290
6 0 -1 0 0 0 -1 1 1 0 1 0 1 0.02098 6 0.61388
I'd like to do a visualization using geom_tile()
, where the var
variables are on the y-axis, and the x-axis is based on the categorical index
, with the filling determined by whether the variables are -1, 0, or 1. The issue is that at the same time, the x-axis should show the cumulative_proportion
variable and the tiles should be scaled based on proportion
. Here's a non-ggplot example, some code that constructs a non-scaled version and the example data frame:
plot.df <- reshape2::melt(plot.df, id.vars=c("index", "proportion", "cumulative_proportion"))
ggplot(plot.df, aes(x=index, y=variable, fill=as.factor(value))) +
geom_tile()
structure(list(x1 = c(0, 0, 1, 0, 0, 0, 1, 0, 0, 1), x2 = c(-1,
-1, -1, -1, -1, -1, -1, -1, -1, -1), x3 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 1), x4 = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 0), x5 = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0), x6 = c(-1, -1, -1, -1, -1, -1, -1,
-1, -1, -1), x7 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), x8 = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1), x9 = c(0, 1, 0, 0, 0, 0, 0, 0, 0,
0), x10 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1), x11 = c(0, 0, 0, 0,
1, 0, 0, 0, 0, 0), x12 = c(0, 0, 0, 0, 0, 1, 1, 0, 1, 0), proportion = c(0.28423,
0.20511, 0.05751, 0.02388, 0.02217, 0.02098, 0.0165, 0.01383,
0.01185, 0.01013), index = 1:10, cumulative_proportion = c(0.28423,
0.48934, 0.54685, 0.57073, 0.5929, 0.61388, 0.63038, 0.64421,
0.65606, 0.66619)), row.names = c(NA, -10L), class = "data.frame")
One option would be to switch to geom_rect
which requires some additional data wrangling to compute or set the positions of the rectangles:
library(ggplot2)
library(dplyr, warn = FALSE)
plot.df <- plot.df |>
mutate(
ymin = as.numeric(factor(variable)) - .5,
ymax = as.numeric(factor(variable)) + .5
) |>
mutate(
xmin = lag(cumulative_proportion, default = 0),
xmax = cumulative_proportion,
.by = variable
)
ggplot(plot.df, aes(y = variable, fill = as.factor(value))) +
scale_y_discrete() +
geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), color = "white")