rggplot2colorsscale

Pass colors properly to manual scale in ggplot2 with multiple layers


I have a relatively complex ggplot2 plot with different layers: geom_line, geom_segment, geom_point and geom_text...

This is my MWE:

myseq <- "ACGTCAGTCGTAGCTAGCTAGCTGCATTATCGATCGATCGATCGA"
basecolors <- c("green","blue","black","red","magenta","grey")
aacolors <- c("#E60A0A","#E6E600","#145AFF","#FA9600","#3232AA","#00DCDC","#EBEBEB","#0F820F","#C8C8C8","#B45AB4","#8282D2","#DC9682","#BEA06E")
set.seed(123)
xlabels <- unlist(strsplit(myseq, ""))
xbreaks <- 1:length(xlabels)
trace_df <- data.frame(time=xbreaks,
                       A=rnorm(length(xlabels), mean=10, sd=2),
                       C=rnorm(length(xlabels), mean=5, sd=1),
                       G=rnorm(length(xlabels), mean=3, sd=1),
                       T=rnorm(length(xlabels), mean=8, sd=3),
                       call=xlabels)
xcolors <- ifelse(xlabels=="A", basecolors[1],
                  ifelse(xlabels=="C", basecolors[2],
                         ifelse(xlabels=="G", basecolors[3],
                                ifelse(xlabels=="T", basecolors[4],
                                       ifelse(xlabels=="X", basecolors[5], basecolors[6])))))
trace_df <- as.data.frame(tidyr::pivot_longer(trace_df, c(A,C,G,T), names_to="base", values_to="signal"))
#
cdr3_aa <- "VWXYZ"
cdr3_st2 <- 7
cdr3_en2 <- 22
cdr3_call <- unlist(strsplit(cdr3_aa, ""))
cdr3_mat <- t(matrix(c("VEE","WEE","XEE","YEE","ZEE","#E60A0A","#E6E600","#145AFF","#FA9600","#0F820F","white","black","white","black","white"), ncol=3))
df_arrows <- data.frame(x=seq(cdr3_st2, cdr3_en2-3, 3) -0.5, xend=seq(cdr3_st2+3, cdr3_en2, 3) -0.5,
                        y=18, yend=18, label=cdr3_mat[1,], color=cdr3_mat[2,], text=cdr3_mat[3,])
##
P <- ggplot2::ggplot() +
  ggplot2::annotate("rect", xmin=xbreaks[cdr3_st2]-0.5, xmax=xbreaks[cdr3_en2]-0.5, ymin=-Inf, ymax=Inf, fill="grey", alpha=0.25) +
  ggplot2::geom_vline(data=df_arrows[-1,], ggplot2::aes(xintercept=x), color="grey", linetype=2, linewidth=0.5) +
  ggplot2::geom_line(data=trace_df, ggplot2::aes(x=time, y=signal, group=base, colour=base), linewidth=0.5) +
  ggplot2::scale_color_manual(values=c(basecolors,aacolors)) +
  ggplot2::scale_x_continuous(breaks=xbreaks, labels=xlabels, expand=c(0,1),
                              sec.axis=ggplot2::sec_axis(~., breaks=xbreaks, labels=xbreaks)) +
  ggplot2::scale_y_continuous(limits=c(0,20)) +
  ggplot2::geom_segment(data=df_arrows, ggplot2::aes(x=x, xend=xend, y=y, yend=yend, color=color), linewidth=8) +
  ggplot2::geom_point(data=df_arrows, ggplot2::aes(x=xend, y=y, color=color), shape=18, size=8) +
  ggplot2::geom_text(data=df_arrows, ggplot2::aes(x=(x+xend)/2, y=y, label=label, color=text), fontface="bold") +
  ggplot2::theme_light() +
  ggplot2::theme(axis.text.x=ggtext::element_markdown(face="bold", size=12, color=xcolors),
                 axis.text.x.top=ggplot2::element_text(size=10, color="grey", angle=45, hjust=0),
                 axis.text.y=ggplot2::element_text(size=12),
                 axis.title=ggplot2::element_blank(),
                 panel.grid.minor.x=ggplot2::element_blank(),
                 panel.grid.minor.y=ggplot2::element_line(linetype=2),
                 legend.position="none")
grDevices::pdf(file="test.pdf", height=3, width=10)
print(P)
grDevices::dev.off()

At first I was passing just basecolors to scale_color_manual, but I got the error:

Insufficient values in manual scale

Now every combination I try for the colors there, messes up everything. I would need the colors of the lines in the plot to be the same colors as the letters in the x-axis (these are correct), and the colors of the segments and their text to be as specified in df_arrows...

Currently the above code produces this instead:

fig

How should I pass the different color schemes properly? Thanks


Solution

  • As you already have columns containing the colors for the segments, points and the labels in the df_arrows dataframe a quick fix would be to wrap in I() aka AsIs in the mapping (note: this requires ggplot2 >= 3.5.0). Doing so the "categories" are interpreted as colors (similar to scale_color/fill_identity) and do not require to add them to the manual scale.

    library(ggplot2)
    
    ggplot() +
      annotate("rect",
        xmin = xbreaks[cdr3_st2] - 0.5, xmax = xbreaks[cdr3_en2] - 0.5,
        ymin = -Inf, ymax = Inf, fill = "grey", alpha = 0.25
      ) +
      geom_vline(
        data = df_arrows[-1, ], aes(xintercept = x),
        color = "grey", linetype = 2, linewidth = 0.5
      ) +
      geom_line(
        data = trace_df, aes(x = time, y = signal, group = base, colour = base),
        linewidth = 0.5
      ) +
      scale_color_manual(values = basecolors) +
      scale_x_continuous(
        breaks = xbreaks, labels = xlabels, expand = c(0, 1),
        sec.axis = sec_axis(~., breaks = xbreaks, labels = xbreaks)
      ) +
      scale_y_continuous(limits = c(0, 20)) +
      geom_segment(
        data = df_arrows, aes(x = x, xend = xend, y = y, yend = yend, color = I(color)),
        linewidth = 8
      ) +
      geom_point(
        data = df_arrows, aes(x = xend, y = y, color = I(color)),
        shape = 18, size = 8
      ) +
      geom_text(
        data = df_arrows, aes(x = (x + xend) / 2, y = y, label = label, color = I(text)),
        fontface = "bold"
      ) +
      theme_light() +
      theme(
        axis.text.x = ggtext::element_markdown(face = "bold", size = 12, color = xcolors),
        axis.text.x.top = element_text(size = 10, color = "grey", angle = 45, hjust = 0),
        axis.text.y = element_text(size = 12),
        axis.title = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_line(linetype = 2),
        legend.position = "none"
      )
    

    enter image description here