rggplot2ggh4x

Using mutate (dplyr) to modify values of a column, while preserving groups order


I use this dataset and code to create a nested boxplot in which the groups are reordered, following the method suggested on [https://stackoverflow.com/questions/77617469/problem-with-reordering-a-nested-x-axis-with-ggh4x-package :

set1 <- structure(list(Tx = c("Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", 
"Not Exposed", "Not Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", 
"Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Not Exposed", "Not Exposed", 
"Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Exposed", "Exposed", 
"Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", 
"Exposed", "Exposed"), Species = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), levels = c("Species1", "Species2"), class = "factor"), Size = c(88.5, 
83.3, 59.5, 78, 50.3, 57, 78.2, 59, 85, 59.5, 13.1, 50.1, 55, 
60.1, 13.8, 27, 57.1, 53.1, 42, 16, 88.8, 26.2, 62, 108.5, 92.3, 
74.4, 77.3, 96, 88.7, 77.8, 50.7, 61.9, 65.1, 63.5, 64, 88.6, 
53.8, 82.1, 78.8, 75.6)), row.names = c(NA, -40L), class = c("tbl_df", 
"tbl", "data.frame"))

set1 <- set1 %>%
  mutate(w=mean(Size), max = max(Size), .by = c(Tx, Species))

set1$Tx <- factor(set1$Tx, levels = c("Not Exposed", "Exposed"))

set1 <- set1 %>%
  mutate(Tx = paste0(Tx, '\n(n = ', n(), ')'), .by = c(Tx, Species)) 

ggplot(set1, aes(x=interaction(Tx, Species), y=Size)) +
  geom_boxplot() +               
  geom_jitter(width = 0.1, stroke=0.5, size=2) +
  guides(x = "axis_nested") +
  theme_classic()  

The following code usually allows me to add the number of observations (n = ...) on each axis ticks. set1 <- set1 %>% mutate(Tx = paste0(Tx, '\n(n = ', n(), ')'), .by = c(Tx, Species))

However when used after set1$Tx <- factor(set1$Tx, levels = c("Not Exposed", "Exposed")), the groups are no longer reordered correctly ("Exposed" comes before "Not exposed" in the nested x-axis).


Solution

  • When you create "Tx (n)" (using set1 <- set1 %>% mutate(Tx = paste0(Tx, '\n(n = ', n(), ')'), .by = c(Tx, Species))) the variable type changes from factor to character; if "Tx (n)" is a factor you don't lose your desired order, e.g.

    library(tidyverse)
    library(ggh4x)
    
    set1 <- structure(list(Tx = c("Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", 
                                  "Not Exposed", "Not Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", 
                                  "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Not Exposed", "Not Exposed", 
                                  "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Exposed", "Exposed", 
                                  "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", 
                                  "Exposed", "Exposed"), Species = structure(c(1L, 1L, 1L, 1L, 
                                                                               1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                               2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                               2L, 2L, 2L, 2L), levels = c("Species1", "Species2"), class = "factor"), Size = c(88.5, 
                                                                                                                                                                83.3, 59.5, 78, 50.3, 57, 78.2, 59, 85, 59.5, 13.1, 50.1, 55, 
                                                                                                                                                                60.1, 13.8, 27, 57.1, 53.1, 42, 16, 88.8, 26.2, 62, 108.5, 92.3, 
                                                                                                                                                                74.4, 77.3, 96, 88.7, 77.8, 50.7, 61.9, 65.1, 63.5, 64, 88.6, 
                                                                                                                                                                53.8, 82.1, 78.8, 75.6)), row.names = c(NA, -40L), class = c("tbl_df", 
                                                                                                                                                                                                                             "tbl", "data.frame"))
    
    set1 <- set1 %>%
      mutate(w=mean(Size), max = max(Size), .by = c(Tx, Species))
    
    set1$Tx <- factor(set1$Tx, levels = c("Not Exposed", "Exposed"),
                      ordered = TRUE)
    
    set1 <- set1 %>%
      mutate(Tx = factor(paste0(Tx, '\n(n = ', n(), ')')), .by = c(Tx, Species))
    
    ggplot(set1, aes(x=interaction(Tx, Species), y=Size)) +
      geom_boxplot() +               
      geom_jitter(width = 0.1, stroke=0.5, size=2) +
      guides(x = "axis_nested") +
      theme_classic()
    

    Created on 2023-12-10 with reprex v2.0.2


    If you want to specify the order, you need to specify the factor levels for the "Tx (n)" variable, e.g.

    library(tidyverse)
    library(ggh4x)
    
    set1 <- structure(list(Tx = c("Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", 
                                  "Not Exposed", "Not Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", 
                                  "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Not Exposed", "Not Exposed", 
                                  "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Not Exposed", "Exposed", "Exposed", 
                                  "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", "Exposed", 
                                  "Exposed", "Exposed"), Species = structure(c(1L, 1L, 1L, 1L, 
                                                                               1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                               2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                               2L, 2L, 2L, 2L), levels = c("Species1", "Species2"), class = "factor"), Size = c(88.5, 
                                                                                                                                                                83.3, 59.5, 78, 50.3, 57, 78.2, 59, 85, 59.5, 13.1, 50.1, 55, 
                                                                                                                                                                60.1, 13.8, 27, 57.1, 53.1, 42, 16, 88.8, 26.2, 62, 108.5, 92.3, 
                                                                                                                                                                74.4, 77.3, 96, 88.7, 77.8, 50.7, 61.9, 65.1, 63.5, 64, 88.6, 
                                                                                                                                                                53.8, 82.1, 78.8, 75.6)), row.names = c(NA, -40L), class = c("tbl_df", 
                                                                                                                                                                                                                             "tbl", "data.frame"))
    
    set1 <- set1 %>%
      mutate(w=mean(Size), max = max(Size), .by = c(Tx, Species))
    
    set1$Tx <- factor(set1$Tx, levels = c("Not Exposed", "Exposed"),
                      ordered = TRUE)
    
    set1 <- set1 %>%
      mutate(Tx = factor(paste0(Tx, '\n(n = ', n(), ')'),
                         levels = c(paste0("Not Exposed", '\n(n = ', n(), ')'),
                                    paste0("Exposed", '\n(n = ', n(), ')'))),
                                    .by = c(Tx, Species))
    
    ggplot(set1, aes(x=interaction(Tx, Species), y=Size)) +
      geom_boxplot() +               
      geom_jitter(width = 0.1, stroke=0.5, size=2) +
      guides(x = "axis_nested") +
      theme_classic()
    

    Created on 2023-12-10 with reprex v2.0.2