rggplot2facet-grid

Issue in right-aligning y-axis labels in faceted plot using ggplot2


I'm trying to prepare a faceted plot with y-axis labels colored based on certain conditions. I have managed to get the y-axis labels mapped to their respective colors but the alignment of the labels seems to be a problem. I want the labels to be right-aligned to the y-axis. I tried implementing the solutions suggested here and here but none of them seem to work.

Here's the data:

> dput(df)
structure(list(Term = structure(c(103L, 98L, 97L, 94L, 68L, 65L, 
62L, 59L, 56L, 55L, 99L, 96L, 95L, 93L, 72L, 70L, 66L, 86L, 79L, 
64L, 63L, 58L, 57L, 54L), levels = c("GO:1901642~nucleoside transmembrane transport", 
"GO:0046033~AMP metabolic process", "GO:1990544~mitochondrial ATP transmembrane transport", 
"GO:0140021~mitochondrial ADP transmembrane transport", "GO:0005471~ATP:ADP antiporter activity", 
"GO:0003876~AMP deaminase activity", "GO:0047555~3',5'-cyclic-GMP phosphodiesterase activity", 
"GO:0005324~long-chain fatty acid transmembrane transporter activity", 
"GO:0005337~nucleoside transmembrane transporter activity", "GO:0042761~very long-chain fatty acid biosynthetic process", 
"GO:0019367~fatty acid elongation, saturated fatty acid", "GO:0034625~fatty acid elongation, monounsaturated fatty acid", 
"GO:0034626~fatty acid elongation, polyunsaturated fatty acid", 
"GO:0032264~IMP salvage", "GO:0006006~glucose metabolic process", 
"GO:0090263~positive regulation of canonical Wnt signaling pathway", 
"GO:0005319~lipid transporter activity", "GO:0009922~fatty acid elongase activity", 
"GO:0010608~post-transcriptional regulation of gene expression", 
"GO:0007288~sperm axoneme assembly", "GO:0030148~sphingolipid biosynthetic process", 
"GO:0005952~cAMP-dependent protein kinase complex", "GO:0097729~9+2 motile cilium", 
"GO:0034620~cellular response to unfolded protein", "GO:0004198~calcium-dependent cysteine-type endopeptidase activity", 
"GO:0008569~minus-end-directed microtubule motor activity", "GO:0018107~peptidyl-threonine phosphorylation", 
"GO:0046835~carbohydrate phosphorylation", "GO:0003730~mRNA 3'-UTR binding", 
"GO:0006096~glycolytic process", "GO:0005871~kinesin complex", 
"GO:0006869~lipid transport", "GO:0051287~NAD binding", "GO:0031514~motile cilium", 
"GO:0140359~ABC-type transporter activity", "GO:0018105~peptidyl-serine phosphorylation", 
"GO:0031072~heat shock protein binding", "GO:0006099~tricarboxylic acid cycle", 
"GO:0005516~calmodulin binding", "GO:0003333~amino acid transmembrane transport", 
"GO:0030286~dynein complex", "GO:0051959~dynein light intermediate chain binding", 
"GO:0007165~signal transduction", "GO:0015171~amino acid transmembrane transporter activity", 
"GO:0004672~protein kinase activity", "GO:0005975~carbohydrate metabolic process", 
"GO:0051787~misfolded protein binding", "GO:0007018~microtubule-based movement", 
"GO:0004722~protein serine/threonine phosphatase activity", "GO:0004674~protein serine/threonine kinase activity", 
"GO:0140662~ATP-dependent protein folding chaperone", "GO:0051085~chaperone cofactor-dependent protein refolding", 
"GO:0006468~protein phosphorylation", "GO:0016301~kinase activity", 
"GO:0042026~protein refolding", "GO:0035556~intracellular signal transduction", 
"GO:0003777~microtubule motor activity", "GO:0017018~myosin phosphatase activity", 
"GO:0016310~phosphorylation", "GO:0005874~microtubule", "GO:0009507~chloroplast", 
"GO:0016311~dephosphorylation", "GO:0008017~microtubule binding", 
"GO:0000287~magnesium ion binding", "GO:0055085~transmembrane transport", 
"GO:0005929~cilium", "GO:0003729~mRNA binding", "GO:0006508~proteolysis", 
"GO:0005524~ATP binding", "GO:0005886~plasma membrane", "GO:0016887~ATP hydrolysis activity", 
"GO:0005829~cytosol", "GO:0005737~cytoplasm", "GO:0016020~membrane", 
"GO:0005739~mitochondrion", "GO:0015031~protein transport", "GO:0000398~mRNA splicing, via spliceosome", 
"GO:0045505~dynein intermediate chain binding", "GO:0003755~peptidyl-prolyl cis-trans isomerase activity", 
"GO:0030134~COPII-coated ER to Golgi transport vesicle", "GO:0005868~cytoplasmic dynein complex", 
"GO:0016226~iron-sulfur cluster assembly", "GO:0071013~catalytic step 2 spliceosome", 
"GO:0007017~microtubule-based process", "GO:0046540~U4/U6 x U5 tri-snRNP complex", 
"GO:0051537~2 iron, 2 sulfur cluster binding", "GO:0005665~RNA polymerase II, core complex", 
"GO:0005686~U2 snRNP", "GO:0005682~U5 snRNP", "GO:0005689~U12-type spliceosomal complex", 
"GO:0004792~thiosulfate sulfurtransferase activity", "GO:0071011~precatalytic spliceosome", 
"GO:0016272~prefoldin complex", "GO:0000387~spliceosomal snRNP assembly", 
"GO:0005687~U4 snRNP", "GO:0005685~U1 snRNP", "GO:0140647~P450-containing electron transport chain", 
"GO:0006367~transcription initiation at RNA polymerase II promoter", 
"GO:0000811~GINS complex", "GO:0034719~SMN-Sm protein complex", 
"GO:0097526~spliceosomal tri-snRNP complex", "GO:0034715~pICln-Sm protein complex", 
"GO:0045842~positive regulation of mitotic metaphase/anaphase transition"
), class = c("ordered", "factor")), Category = c("Biological process", 
"Biological process", "Biological process", "Biological process", 
"Biological process", "Biological process", "Biological process", 
"Biological process", "Biological process", "Biological process", 
"Cellular component", "Cellular component", "Cellular component", 
"Cellular component", "Cellular component", "Cellular component", 
"Cellular component", "Molecular function", "Molecular function", 
"Molecular function", "Molecular function", "Molecular function", 
"Molecular function", "Molecular function"), PValue = c(0.0173149736221496, 
0.0022211250550039, 0.0378179166640749, 0.00127796539857151, 
0.00798138530033138, 0.00366564486671723, 0.0473204717333284, 
2.22454619848845e-16, 0.000422682592906967, 0.0192436873377812, 
0.0232976965541856, 0.000899326018158991, 0.000899326018158991, 
0.0138812532009331, 0.00368570740043085, 0.0133201149734851, 
0.000113258109494652, 0.0216207725534343, 0.0192092227496835, 
0.0359035412306864, 0.000661709893356406, 0.0216873994434862, 
0.00146360693566605, 0.00145820771302719), Fold.Enrichment = c(5.8453947368421, 
5.01033834586466, 4.67631578947368, 4.54641812865497, -1.5614892923452, 
-1.63674178836183, -1.81334240401378, -1.92576126700771, -1.96445427101492, 
-1.96445427101492, 5.27043269230769, 4.61162860576923, 4.61162860576923, 
4.39202724358974, -1.24768223226522, -1.37019016159272, -1.61838520486908, 
2.93721286370597, 2.33112132040157, -1.76124885215794, -1.78745791245791, 
-1.92748917748918, -1.94326241134752, -1.98140495867769), Regulation = c("Upregulated", 
"Upregulated", "Upregulated", "Upregulated", "Downregulated", 
"Downregulated", "Downregulated", "Downregulated", "Downregulated", 
"Downregulated", "Upregulated", "Upregulated", "Upregulated", 
"Upregulated", "Downregulated", "Downregulated", "Downregulated", 
"Upregulated", "Upregulated", "Downregulated", "Downregulated", 
"Downregulated", "Downregulated", "Downregulated"), Label = c("orange", 
"red", "red", "orange", "blue", "blue", "cyan4", "blue", "cyan4", 
"cyan4", "orange", "orange", "orange", "orange", "blue", "cyan4", 
"blue", "orange", "orange", "blue", "cyan4", "cyan4", "cyan4", 
"blue")), row.names = c(1L, 6L, 7L, 10L, 36L, 39L, 42L, 45L, 
48L, 49L, 5L, 8L, 9L, 11L, 32L, 34L, 38L, 18L, 25L, 40L, 41L, 
46L, 47L, 50L), class = "data.frame")

and the code:

library(ggh4x) 
library(ggplot2)


strip <- strip_themed(background_x = elem_list_rect(fill = c("#0C6291", "#A63446")))

ggplot(data = df, 
   aes(x = Fold.Enrichment, 
       y = Term)) + 
geom_bar(aes(fill = PValue),
         color = "black",
         stat = "identity",
         width = 0.4) +
facet_grid2(df$Category ~ df$Regulation,
            scales = "free",
            strip = strip) +
theme_bw() + 
labs(x = "Fold enrichment",
     y = "") +
scale_fill_gradient2(name = "p-value\n", 
                     low = "red", 
                     mid = "yellow",
                     high = "blue",
                     midpoint = 0.025,
                     limits = c(0, 0.05)) +
geom_text(data = df,
          aes(color = Label,
              label = Term,
              y = Term),
          x = -2.2,
          size = 5/.pt,
          hjust = 1,
          vjust = 0.5,
          show.legend = FALSE) +
scale_color_manual(values = c("red" = "red", 
                              "blue" = "blue", 
                              "orange" = "orange", 
                              "cyan4" = "cyan4")) +
coord_cartesian(clip = "off") +
theme(axis.text.y = element_blank())

Solution

  • IMHO you can achieve achieve your desired quite easily by employing ggtext like so:

    library(ggh4x)
    library(ggplot2)
    
    strip <- strip_themed(background_x = elem_list_rect(fill = c("#0C6291", "#A63446")))
    
    df <- df |>
      dplyr::mutate(
        y_md = glue::glue("<span style='color: {Label}'>{Term}</span>"),
        y_md = forcats::fct_rev(forcats::fct_inorder(y_md))
      )
    
    p <- ggplot(
      data = df,
      aes(
        x = Fold.Enrichment,
        y = Term
      )
    ) +
      geom_bar(aes(fill = PValue),
        color = "black",
        stat = "identity",
        width = 0.4
      ) +
      facet_grid2(
        Category ~ Regulation,
        scales = "free",
        strip = strip
      ) +
      theme_bw() +
      labs(
        x = "Fold enrichment",
        y = NULL
      ) +
      scale_fill_gradient2(
        name = "p-value\n",
        low = "red",
        mid = "yellow",
        high = "blue",
        midpoint = 0.025,
        limits = c(0, 0.05)
      )
    
    p +
      aes(y = y_md) +
      theme(axis.text.y.left = ggtext::element_markdown(size = 5)) +
      labs(y = NULL)
    

    But if you want to fake the axis text using geom_text then you have to ensure to add the labels via the first column of facets only:

    df_axis_text <- df |>
      dplyr::distinct(Term, Category, Label) |>
      dplyr::mutate(Regulation = "Downregulated")
    
    p +
      geom_text(
        data = df_axis_text,
        aes(
          color = I(Label),
          label = Term,
          y = Term
        ),
        x = I(-0.05),
        size = 5 / .pt,
        hjust = 1,
        vjust = 0.5,
        show.legend = FALSE
      ) +
      coord_cartesian(clip = "off") +
      theme(
        axis.text.y.left = element_blank(),
        plot.margin = margin(t = 5.5, r = 5.5, b = 5.5, l = 180, unit = "pt")
      )