rggplot2likert

Sort based on factor levels two plot in ggplot2


i have a data frame called df that contains likert data and a column var with 5 levels.

I want to sort both the likert and the bar plot based on a specific order.For example i want to sort them from top to bottom the "A,C,D,E,B" and not based on the sum of the proportions :

library(tibble)
library(tidyverse)
library(ggplot2)
library(ggstats)

var_levels <- c(LETTERS[1:5])
n = 500
likert_levels = c(
  "Very \n Dissatisfied",
  "Dissatisfied",
  "Neutral",
  "Satisfied",
  "Very \n Satisfied"
)

df <- tibble(
  var = sample(var_levels, n, replace = TRUE),  
  val1 = sample(likert_levels, n, replace = TRUE),
  val2 = sample(likert_levels, n, replace = TRUE)
)

df
df2 = df%>%
  pivot_longer(!var, names_to = "Categories", values_to = "likert_values")%>%
  select(-Categories)
df2

library(tidyverse)
library(ggstats)
library(patchwork)

# Define the order of 'var' levels
desired_order <- c("A", "C", "D", "E", "B")

# Ensure 'var' is a factor with the specified order
dat <- df |>
  mutate(
    var = factor(var, levels = desired_order),
    across(-var, ~ factor(.x, likert_levels))
  ) |>
  pivot_longer(-var, names_to = "group") |>
  count(var, value, group) |>
  complete(var, value, group, fill = list(n = 0)) |>
  mutate(
    prop = n / sum(n),
    prop_lower = sum(prop[value %in% likert_levels[1:2]]),
    prop_higher = sum(prop[value %in% likert_levels[4:5]]),
    .by = c(var, group)
  ) |>
  arrange(group, prop_lower) |>
  mutate(
    y_sort = paste(var, group, sep = "."),
    y_sort = fct_inorder(y_sort)
  )

top10 <- dat |>
  distinct(group, var, prop_lower) |>
  slice_max(prop_lower, n = 10, by = group)

dat <- dat |>
  semi_join(top10)

dat_tot <- dat |>
  distinct(group, var, y_sort, prop_lower, prop_higher) |>
  pivot_longer(-c(group, var, y_sort),
               names_to = c(".value", "name"),
               names_sep = "_"
  ) |>
  mutate(
    hjust_tot = ifelse(name == "lower", 1, 0),
    x_tot = ifelse(name == "lower", -1, 1)
  )

dat_bar <- dat |> 
  summarise(
    n = sum(n), .by = c(y_sort, group)
  )

p1 <- ggplot(dat, aes(y = y_sort, x = prop, fill = value)) +
  geom_col(position = position_likert(reverse = FALSE)) +
  geom_text(
    aes(
      label = label_percent_abs(hide_below = .05, accuracy = 1)(prop),
      color = after_scale(hex_bw(.data$fill))
    ),
    position = position_likert(vjust = 0.5, reverse = FALSE),
    size = 3.5
  ) +
  geom_label(
    aes(
      x = x_tot,
      label = label_percent_abs(accuracy = 1)(prop),
      hjust = hjust_tot,
      fill = NULL
    ),
    data = dat_tot,
    size = 3.5,
    color = "black",
    fontface = "bold",
    label.size = 0,
    show.legend = FALSE
  ) +
  scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
  scale_x_continuous(
    labels = label_percent_abs(),
    expand = c(0, .15)
  ) +
  scale_fill_brewer(palette = "BrBG") +
  facet_wrap(~group,
             scales = "free_y", ncol = 1,
             strip.position = "right"
  ) +
  theme_light() +
  theme(
    legend.position = "bottom",
    panel.grid.major.y = element_blank(),
    strip.text = element_blank()
  ) +
  labs(x = NULL, y = NULL, fill = NULL)

p2 <- ggplot(dat_bar, aes(y = y_sort, x = n)) +
  geom_col() +
  geom_label(
    aes(
      label = label_number_abs(hide_below = .05, accuracy = 1)(n)
    ),
    size = 3.5,
    hjust = 1,
    fill = NA,
    label.size = 0,
    color = "white"
  ) +
  scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
  scale_x_continuous(
    labels = label_number_abs(),
    expand = c(0, 0, 0, .05)
  ) +
  facet_wrap(~group,
             scales = "free_y", ncol = 1,
             strip.position = "right"
  ) +
  theme_light() +
  theme(
    legend.position = "bottom",
    panel.grid.major.y = element_blank()
  ) +
  labs(x = NULL, y = NULL, fill = NULL)

# Combine the plots
p1 + p2 +
  plot_layout(
    guides = "collect") & 
  theme(legend.position = "bottom")

the problem is that the df is the original data frame and df2 is the appended data frame.Combining those two to plot the bar plot from the original and the likert plot form the appended.Both to be sorted from top to bottom as the order "A,C,D,E,B". How can i do it in R ?

enter image description here


Solution

  • Based on the given code you can achieve your desired result using y_sort = factor(var, levels = rev(desired_order)):

    library(tidyverse)
    library(ggstats)
    library(patchwork)
    
    # Define the order of 'var' levels
    desired_order <- c("A", "C", "D", "E", "B")
    
    # Ensure 'var' is a factor with the specified order
    dat <- df |>
      mutate(
        var = factor(var, levels = desired_order),
        across(-var, ~ factor(.x, likert_levels))
      ) |>
      pivot_longer(-var, names_to = "group") |>
      count(var, value, group) |>
      complete(var, value, group, fill = list(n = 0)) |>
      mutate(
        prop = n / sum(n),
        prop_lower = sum(prop[value %in% likert_levels[1:2]]),
        prop_higher = sum(prop[value %in% likert_levels[4:5]]),
        .by = c(var, group)
      ) |>
      arrange(group, prop_lower) |>
      mutate(
        y_sort = factor(var, levels = rev(desired_order))
      )
    
    top10 <- dat |>
      distinct(group, var, prop_lower) |>
      slice_max(prop_lower, n = 10, by = group)
    
    dat <- dat |>
      semi_join(top10)
    #> Joining with `by = join_by(var, group, prop_lower)`
    
    dat_tot <- dat |>
      distinct(group, var, y_sort, prop_lower, prop_higher) |>
      pivot_longer(-c(group, var, y_sort),
        names_to = c(".value", "name"),
        names_sep = "_"
      ) |>
      mutate(
        hjust_tot = ifelse(name == "lower", 1, 0),
        x_tot = ifelse(name == "lower", -1, 1)
      )
    
    dat_bar <- dat |>
      summarise(
        n = sum(n), .by = c(y_sort, group)
      )
    
    p1 <- ggplot(dat, aes(y = y_sort, x = prop, fill = value)) +
      geom_col(position = position_likert(reverse = FALSE)) +
      geom_text(
        aes(
          label = label_percent_abs(hide_below = .05, accuracy = 1)(prop),
          color = after_scale(hex_bw(.data$fill))
        ),
        position = position_likert(vjust = 0.5, reverse = FALSE),
        size = 3.5
      ) +
      geom_label(
        aes(
          x = x_tot,
          label = label_percent_abs(accuracy = 1)(prop),
          hjust = hjust_tot,
          fill = NULL
        ),
        data = dat_tot,
        size = 3.5,
        color = "black",
        fontface = "bold",
        label.size = 0,
        show.legend = FALSE
      ) +
      scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
      scale_x_continuous(
        labels = label_percent_abs(),
        expand = c(0, .15)
      ) +
      scale_fill_brewer(palette = "BrBG") +
      facet_wrap(~group,
        scales = "free_y", ncol = 1,
        strip.position = "right"
      ) +
      theme_light() +
      theme(
        legend.position = "bottom",
        panel.grid.major.y = element_blank(),
        strip.text = element_blank()
      ) +
      labs(x = NULL, y = NULL, fill = NULL)
    
    p2 <- ggplot(dat_bar, aes(y = y_sort, x = n)) +
      geom_col() +
      geom_label(
        aes(
          label = label_number_abs(hide_below = .05, accuracy = 1)(n)
        ),
        size = 3.5,
        hjust = 1,
        fill = NA,
        label.size = 0,
        color = "white"
      ) +
      scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
      scale_x_continuous(
        labels = label_number_abs(),
        expand = c(0, 0, 0, .05)
      ) +
      facet_wrap(~group,
        scales = "free_y", ncol = 1,
        strip.position = "right"
      ) +
      theme_light() +
      theme(
        legend.position = "bottom",
        panel.grid.major.y = element_blank()
      ) +
      labs(x = NULL, y = NULL, fill = NULL)
    
    # Combine the plots
    p1 + p2 +
      plot_layout(
        guides = "collect"
      ) &
      theme(legend.position = "bottom")