rggplot2visualizationgeom

Sorting calendar plot with geom_segment() + fix scale


I am trying to do this calendar ggplot2.

So far, it works fine, but I want to make two adjustments, and I am not finding the way:

  1. I want to arrange every segment within each year and num_months, so the first geom_segment that appears in each num_months/box is the first start_month (lower number), then the second start_month (second lower number) and so on...
  2. I want the scale for each year to start in January and end in December despite not having observations.
data <- read_csv("num_months,start_month_year,end_month_year,B1,B1_p,year,start_month,end_month
1,6,6,3.3571016788482666,0.007681768853217363,2021,5,5
1,8,8,2.548985481262207,0.007373321335762739,2021,7,7
1,10,10,2.139772415161133,0.03452971577644348,2021,9,9
1,12,12,2.165775775909424,0.07796278595924377,2021,11,11
1,13,13,1.9506219625473022,0.09215697646141052,2021,12,12
1,23,23,2.7839596271514893,0.011407249607145786,2022,10,10
1,25,25,2.220555543899536,0.06181173026561737,2022,12,12
6,6,11,0.9881601333618164,0.08719704300165176,2021,5,10
6,8,13,1.438501238822937,0.032221969217061996,2021,7,12
6,9,14,1.16400945186615,0.09187468141317368,2021,8,1
6,10,15,1.5834165811538696,0.03494146466255188,2021,9,2
6,11,16,1.294316291809082,0.09792502969503403,2021,10,3
6,12,17,1.4204859733581543,0.0546354204416275,2021,11,4
6,20,25,1.07038414478302,0.0722803920507431,2022,7,12") %>%
  mutate(
    start_dt = case_when(
      year == 2021 ~ ymd(paste("2021", start_month, "01", sep = "-")),
      year == 2022 ~ ymd(paste("2022", start_month, "01", sep = "-"))
    ),
    end_dt = case_when(
      year == 2021 ~ ceiling_date(ymd(paste("2021", end_month, "01", sep = "-")), unit = "month") - 1,
      year == 2022 ~ ceiling_date(ymd(paste("2022", end_month, "01", sep = "-")), unit = "month") - 1,
    ),
    end_month = ifelse(start_month == end_month, end_month + 1, end_month),
    end_month = ifelse(end_month > 12, 1, end_month)  # Wrap around to January if end_month exceeds 12
  ) %>%
  arrange(year, num_months, start_month) %>%  # Sort data within each group by year, num_months, and start_month
  group_by(year, num_months) %>%
  mutate(
    yy = row_number()  # Assign new y position based on sorted order
  ) %>%
  ungroup()

# Create the ggplot

ggplot(data) +
  geom_segment(aes(x = start_dt, xend = end_dt, y = yy, yend = yy, color = factor(num_months)), size = 1) +
  scale_x_date(
    date_labels = "%b",
    date_breaks = "1 month",
    expand = expansion(0, 0)
  ) +
  scale_y_continuous(labels = NULL) +
  facet_grid(rows = vars(num_months), cols = vars(year), space = "free_y", scales = "free") +
  labs(x = "Month", y = "Number of Months", color = "Number of Months") +
  theme_bw() +
  theme(
    panel.spacing = unit(1, "lines"), # Increase spacing between panels
    panel.grid.major.y = element_blank(),
    axis.ticks.y = element_blank()
  )

Solution

  • To fix the order make sure that the your start date column actually contains the start date for each event. To this end I added two new columns. Afterwards arrange by the start date before setting yy. To fix the second issue I leverage ggh4x::facetted_pos_scales to set the limits of the scale to include the whole year per facet column:

    library(ggplot2)
    library(dplyr, warn = FALSE)
    library(ggh4x)
    
    data <- data |>
      mutate(
        start_dt1 = pmin(start_dt, end_dt),
        end_dt1 = pmax(start_dt, end_dt)
      ) |>
      arrange(start_dt1) |>
      mutate(
        yy = row_number(),
        .by = c(year, num_months)
      )
    
    ggplot(data) +
      geom_segment(aes(
        x = start_dt1, xend = end_dt1,
        y = yy, yend = yy,
        color = factor(num_months)
      ), size = 1) +
      scale_y_continuous(labels = NULL) +
      facet_grid(
        rows = vars(num_months), cols = vars(year),
        space = "free_y", scales = "free"
      ) +
      labs(x = "Month", y = "Number of Months", color = "Number of Months") +
      theme_bw() +
      theme(
        panel.spacing = unit(1, "lines"), # Increase spacing between panels
        panel.grid.major.y = element_blank(),
        axis.ticks.y = element_blank()
      ) +
      ggh4x::facetted_pos_scales(
        x = lapply(
          unique(data$year),
          \(x) scale_x_date(
            date_labels = "%b",
            date_breaks = "1 month",
            expand = expansion(0, 0),
            limits = as.Date(paste0(x, c("-01-01", "-12-31")))
          )
        )
      )