rggplot2facetggproto

Enabling per-facet panel range clipping/cropping in ggplot2 for a factor variable, building on code that can do it for continuous x and y


Sometimes when plotting things in facets the default range doesn't give us the visualization we want, and specifying custom axes ranges would give us a much more interpretable image.

I am trying to modify existing code that enables per-facet panel range clipping for ggplot for continuous x and y axis. The goal is revising the function to handle a factor variable on the x-axis. Ideally, it would then be able to handle either factor or continuous variables. The original code is not mine, but has a history on Stack Overflow, starting with this question https://stackoverflow.com/a/63568587/10405322 and then culminating in this gist on Github: https://gist.github.com/r2evans/6057f7995c117bb787495dc14a228d5d

All that is provided as background information. I will include here the code necessary to replicate the original continuous variable case and then show the y-continuous and x-as-factor case. My own real case is much more complicated with way more data, involving individual boxplots across an x-as-factor variable, but solving this cleaner and simpler base case should solve my issue. I'm not the only person that has had this issue in the past, so rest assured any solution will help others as well. I'll post the code in two parts.

First, here is the function from the gist:

coord_cartesian_panels <- function(..., panel_limits = NULL,
                                   expand = TRUE, default = FALSE, clip = "on") {
  if (is.null(panel_limits)) panel_limits <- tibble::tribble(...)
  ggplot2::ggproto(NULL, UniquePanelCoords,
                   panel_limits = panel_limits,
                   expand = expand, default = default, clip = clip)
}

UniquePanelCoords <- ggplot2::ggproto(
  "UniquePanelCoords", ggplot2::CoordCartesian,
  
  num_of_panels = 1,
  panel_counter = 1,
  layout = NULL,
  
  setup_layout = function(self, layout, params) {
    self$num_of_panels <- length(unique(layout$PANEL))
    self$panel_counter <- 1
    self$layout <- layout # store for later
    layout
  },
  
  setup_panel_params =  function(self, scale_x, scale_y, params = list()) {
    train_cartesian <- function(scale, limits, name, given_range = c(NA, NA)) {
      if (anyNA(given_range)) {
        expansion <- ggplot2:::default_expansion(scale, expand = self$expand)
        range <- ggplot2:::expand_limits_scale(scale, expansion, coord_limits = limits)
        isna <- is.na(given_range)
        given_range[isna] <- range[isna]
      }
      out <- list(
        ggplot2:::view_scale_primary(scale, limits, given_range),
        sec = ggplot2:::view_scale_secondary(scale, limits, given_range),
        arrange = scale$axis_order(),
        range = given_range
      )
      names(out) <- c(name, paste0(name, ".", names(out)[-1]))
      out
    }

    this_layout <- self$layout[ self$panel_counter,, drop = FALSE ]
    self$panel_counter <- 
      if (self$panel_counter < self$num_of_panels) {
        self$panel_counter + 1
      } else 1

    # determine merge column names by removing all "standard" names
    layout_names <- setdiff(names(this_layout),
                            c("PANEL", "ROW", "COL", "SCALE_X", "SCALE_Y"))
    limits_names <- setdiff(names(self$panel_limits),
                            c("xmin", "xmax", "ymin", "ymax"))

    limits_extras <- setdiff(limits_names, layout_names)
    if (length(limits_extras) > 0) {
      stop("facet names in 'panel_limits' not found in 'layout': ",
           paste(sQuote(limits_extras), collapse = ","))
    } else if (length(limits_names) == 0 && NROW(self$panel_limits) == 1) {
      # no panels in 'panel_limits'
      this_panel_limits <- cbind(this_layout, self$panel_limits)
    } else {
      this_panel_limits <- merge(this_layout, self$panel_limits, all.x = TRUE, by = limits_names)
    }

    if (isTRUE(NROW(this_panel_limits) > 1)) {
      stop("multiple matches for current panel in 'panel_limits'")
    }

    # add missing min/max columns, default to "no override" (NA)
    this_panel_limits[, setdiff(c("xmin", "xmax", "ymin", "ymax"),
                                names(this_panel_limits)) ] <- NA

    c(
      train_cartesian(scale_x, self$limits$x, "x",
                      unlist(this_panel_limits[, c("xmin", "xmax"), drop = TRUE])),
      train_cartesian(scale_y, self$limits$y, "y",
                      unlist(this_panel_limits[, c("ymin", "ymax"), drop = TRUE])))
  }
)

Second, below is the test data and test case. At the end of the dataframe generation code I've commented out a pipe and mutate that makes Nsubjects a factor so that this code can be run to see the x and y continuous case, and then quickly modified to make the problematic x factor and y continuous case.

library(dplyr)
library(ggplot2)

test_data <- structure(list(DataType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor"), 
    ExpType = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("X", "Y"), class = "factor"), 
    EffectSize = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("15", "35"
    ), class = "factor"), Nsubjects = c(8, 16, 32, 64, 8, 16, 
    32, 64, 8, 16, 32, 64, 8, 16, 32, 64, 8, 16, 32, 64, 8, 16, 
    32, 64, 8, 16, 32, 64, 8, 16, 32, 64), Odds = c(1.06248116259846, 
    1.09482076720863, 1.23086993413208, 1.76749340505612, 1.06641831731573, 
    1.12616954196688, 1.48351814320987, 3.50755080416964, 1.11601399761081, 
    1.18352602009495, 1.45705466646283, 2.53384744810515, 1.13847061762186, 
    1.24983742407086, 1.97075900741022, 6.01497152563726, 1.02798821372378, 
    1.06297006279249, 1.19432835697453, 1.7320754674107, 1.02813271730924, 
    1.09355953747203, 1.44830680332583, 3.4732692664923, 1.06295915758305, 
    1.12008443626365, 1.3887632112682, 2.46321037334, 1.06722652223114, 
    1.1874936754725, 1.89870184372054, 5.943747409114), Upper = c(1.72895843644471, 
    2.09878774769559, 2.59771794965346, 5.08513435549015, 1.72999898901071, 
    1.8702196882561, 3.85385388850167, 5.92564404180303, 1.99113042576373, 
    2.61074135841984, 3.45852331828636, 4.83900142207583, 1.57897154221764, 
    1.8957409107653, 10, 75, 2.3763918424135, 2.50181951057562, 
    3.45037180395673, 3.99515276392065, 2.04584535265976, 2.39317394040066, 
    2.832526733659, 5.38414183471915, 1.40569501856836, 2.6778044191832, 
    2.98023068052396, 4.75934650422069, 1.54116883311054, 2.50647989271592, 
    3.48517589981551, 100), Lower = c(0.396003888752214, 0.0908537867216577, 
    -0.135978081389309, -1.55014754537791, 0.40283764562075, 
    0.382119395677663, -0.88681760208193, 1.08945756653624, 0.240897569457892, 
    -0.243689318229938, -0.544413985360706, 0.228693474134466, 
    0.69796969302609, 0.603933937376415, 0.183548809738402, 3.57236968943798, 
    -0.320415414965949, -0.375879384990643, -1.06171509000767, 
    -0.531001829099242, 0.010420081958713, -0.206054865456611, 
    0.0640868729926525, 1.56239669826544, 0.720223296597732, 
    -0.437635546655903, -0.202704257987574, 0.167074242459314, 
    0.593284211351745, -0.131492541770921, 0.312227787625573, 
    3.76692741957876)), .Names = c("DataType", "ExpType", "EffectSize", 
"Nsubjects", "Odds", "Upper", "Lower"), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -32L)) # %>%
    # mutate(Nsubjects = as.factor(Nsubjects))
    # ^ UNCOMMENT THE LINES ABOVE TO MAKE THE X-AXIS VAR A FACTOR!


p <- test_data %>%
  ggplot(aes(x = Nsubjects, y = Odds, color=EffectSize)) +
  facet_wrap(DataType ~ ExpType, labeller = label_both, scales = "free") +
  geom_line(size = 2) +
  geom_ribbon(aes(ymax = Upper, ymin = Lower, fill = EffectSize, color = NULL), alpha = 0.2)

p + coord_cartesian_panels(
  panel_limits = tibble::tribble(
    ~DataType, ~ExpType, ~ymin, ~ymax
  , "A"      , "X"     ,     1,     4
  , "A"      , "Y"     ,     1,     6
  , "B"      , "Y"     ,     1,     7
  )
)

How could this function be modified to enable custom per-facet panel range clipping/cropping for the x-axis as a factor variable?


Solution

  • TL;DR

    Inserting the following inside train_cartesian worked for me:

    if(scale$is_discrete()) {limits <- scale$get_limits()}
    

    Full code

    Modified version of gist:

    coord_cartesian_panels <- function(..., panel_limits = NULL,
                                       expand = TRUE, default = FALSE, clip = "on") {
      if (is.null(panel_limits)) panel_limits <- tibble::tribble(...)
      ggplot2::ggproto(NULL, UniquePanelCoords,
                       panel_limits = panel_limits,
                       expand = expand, default = default, clip = clip)
    }
    
    UniquePanelCoords <- ggplot2::ggproto(
      "UniquePanelCoords", ggplot2::CoordCartesian,
      
      num_of_panels = 1,
      panel_counter = 1,
      layout = NULL,
      
      setup_layout = function(self, layout, params) {
        self$num_of_panels <- length(unique(layout$PANEL))
        self$panel_counter <- 1
        self$layout <- layout # store for later
        layout
      },
      
      setup_panel_params =  function(self, scale_x, scale_y, params = list()) {
        
        train_cartesian <- function(scale, limits, name, given_range = c(NA, NA)) {
          if (anyNA(given_range)) {
            expansion <- ggplot2:::default_expansion(scale, expand = self$expand)
            range <- ggplot2:::expand_limits_scale(scale, expansion, coord_limits = limits)
            isna <- is.na(given_range)
            given_range[isna] <- range[isna]
          }
          if(scale$is_discrete()) limits <- scale$get_limits() # new line
          out <- list(
            ggplot2:::view_scale_primary(scale, limits, given_range),
            sec = ggplot2:::view_scale_secondary(scale, limits, given_range),
            arrange = scale$axis_order(),
            range = given_range
          )
          names(out) <- c(name, paste0(name, ".", names(out)[-1]))
          out
        }
        
        this_layout <- self$layout[ self$panel_counter,, drop = FALSE ]
        self$panel_counter <- 
          if (self$panel_counter < self$num_of_panels) {
            self$panel_counter + 1
          } else 1
        
        # determine merge column names by removing all "standard" names
        layout_names <- setdiff(names(this_layout),
                                c("PANEL", "ROW", "COL", "SCALE_X", "SCALE_Y"))
        limits_names <- setdiff(names(self$panel_limits),
                                c("xmin", "xmax", "ymin", "ymax"))
        
        limits_extras <- setdiff(limits_names, layout_names)
        if (length(limits_extras) > 0) {
          stop("facet names in 'panel_limits' not found in 'layout': ",
               paste(sQuote(limits_extras), collapse = ","))
        } else if (length(limits_names) == 0 && NROW(self$panel_limits) == 1) {
          # no panels in 'panel_limits'
          this_panel_limits <- cbind(this_layout, self$panel_limits)
        } else {
          this_panel_limits <- merge(this_layout, self$panel_limits, all.x = TRUE,
                                     by = limits_names)
        }
        
        if (isTRUE(NROW(this_panel_limits) > 1)) {
          stop("multiple matches for current panel in 'panel_limits'")
        }
        
        # add missing min/max columns, default to "no override" (NA)
        this_panel_limits[, setdiff(c("xmin", "xmax", "ymin", "ymax"),
                                    names(this_panel_limits)) ] <- NA
        c(train_cartesian(scale_x, self$limits$x, "x",
                          unlist(this_panel_limits[, c("xmin", "xmax"), drop = TRUE])),
          train_cartesian(scale_y, self$limits$y, "y",
                          unlist(this_panel_limits[, c("ymin", "ymax"), drop = TRUE])))
      }
    
    )
    

    Data (collapsed the x-axis values in the original example to bucket more observations under each factor value):

    test_data <- test_data %>%
      mutate(Nsubjects2 = factor(ifelse(Nsubjects <= 16, "a", "b")))
    

    Code for plot:

    
    # base plot
    p <- ggplot(test_data,
                aes(y = Odds, color=EffectSize)) +
      facet_wrap(DataType ~ ExpType, labeller = label_both, scales = "free") 
    
    # plot with numeric variable mapped to x-axis
    p1 <- p + aes(x = Nsubjects) +
      geom_line(linewidth = 2) +
      geom_ribbon(aes(ymax = Upper, ymin = Lower, fill = EffectSize, color = NULL),
                  alpha = 0.2)
    
    # plot with factor variable mapped to x-axis
    # (demonstrating with boxplot because I don't think it makes a lot of sense
    # to have a line plot for factor axis)
    p2 <- p + aes(x = Nsubjects2) +
      geom_boxplot()
    
    # coord layer
    cc <- coord_cartesian_panels(
      panel_limits = tibble::tribble(
        ~DataType, ~ExpType, ~ymin, ~ymax
        , "A"      , "X"     ,     1,     4
        , "A"      , "Y"     ,     1,     6
        , "B"      , "Y"     ,     1,     7
      )
    )
    
    # print plots to show that the same coord layer works for both
    p1 + ggtitle("Before (numeric x)")
    p1 + cc + ggtitle("After (numeric x)")
    p2 + ggtitle("Before (factor x)")
    p2 + cc + ggtitle("After (factor x)")
    

    Result: screenshot of 4 plots strung together using patchwork