rggplot2graphvisualizationscatterpie

pie charts in package scatterpie appear as lines on ggplot


Please find a fully reproducible example of my code using fake data :

library(dplyr)
library(ggplot2)
library(scatterpie)  
library(colorspace) 

set.seed(123)  # SEED
years <- c(1998, 2004, 2010, 2014, 2017, 2020)
origins <- c("Native", "Europe", "North Africa", "Sub-Saharan Africa", "Other")

composition_by_origin <- expand.grid(
  year = years,
  origin_group = origins
)

composition_by_origin <- composition_by_origin %>%
  mutate(
    # Patrimoine moyen total par groupe et année
    mean_wealth = case_when(
      origin_group == "Native" ~ 200000 + (year - 1998) * 8000 + rnorm(n(), 0, 10000),
      origin_group == "Europe" ~ 150000 + (year - 1998) * 7000 + rnorm(n(), 0, 9000),
      origin_group == "North Africa" ~ 80000 + (year - 1998) * 4000 + rnorm(n(), 0, 5000),
      origin_group == "Sub-Saharan Africa" ~ 60000 + (year - 1998) * 3000 + rnorm(n(), 0, 4000),
      origin_group == "Other" ~ 100000 + (year - 1998) * 5000 + rnorm(n(), 0, 7000)
    ),
    
    mean_real_estate = case_when(
      origin_group == "Native" ~ mean_wealth * (0.55 + rnorm(n(), 0, 0.05)),
      origin_group == "Europe" ~ mean_wealth * (0.50 + rnorm(n(), 0, 0.05)),
      origin_group == "North Africa" ~ mean_wealth * (0.65 + rnorm(n(), 0, 0.05)),
      origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.70 + rnorm(n(), 0, 0.05)),
      origin_group == "Other" ~ mean_wealth * (0.60 + rnorm(n(), 0, 0.05))
    ),
    
    mean_financial = case_when(
      origin_group == "Native" ~ mean_wealth * (0.25 + rnorm(n(), 0, 0.03)),
      origin_group == "Europe" ~ mean_wealth * (0.30 + rnorm(n(), 0, 0.03)),
      origin_group == "North Africa" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.03)),
      origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.03)),
      origin_group == "Other" ~ mean_wealth * (0.20 + rnorm(n(), 0, 0.03))
    ),
    
    mean_professional = case_when(
      origin_group == "Native" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
      origin_group == "Europe" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
      origin_group == "North Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
      origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
      origin_group == "Other" ~ mean_wealth * (0.12 + rnorm(n(), 0, 0.02))
    )
  )

composition_by_origin <- composition_by_origin %>%
  mutate(
    mean_other = mean_wealth - (mean_real_estate + mean_financial + mean_professional),
    # Corriger les valeurs négatives potentielles
    mean_other = ifelse(mean_other < 0, 0, mean_other)
  )

prepare_scatterpie_data <- function(composition_data) {
  # Sélectionner et renommer les colonnes pertinentes
  plot_data <- composition_data %>%
    select(
      year, 
      origin_group, 
      mean_wealth,
      mean_real_estate,
      mean_financial,
      mean_professional,
      mean_other
    ) %>%
    # Filtrer pour exclure les valeurs NA ou 0 pour mean_wealth
    filter(!is.na(mean_wealth) & mean_wealth > 0)
  
  return(plot_data)
}

create_color_palette <- function() {
  base_colors <- c(
    "Native" = "#1f77b4",
    "Europe" = "#4E79A7",
    "North Africa" = "#F28E2B", 
    "Sub-Saharan Africa" = "#E15759",
    "Other" = "#76B7B2"
  )
  
  all_colors <- list()
  
  for (group in names(base_colors)) {
    base_color <- base_colors[group]
    
    all_colors[[paste0(group, "_real_estate")]] <- colorspace::darken(base_color, 0.3)  # Version foncée
    all_colors[[paste0(group, "_professional")]] <- base_color  # Version standard
    all_colors[[paste0(group, "_financial")]] <- colorspace::lighten(base_color, 0.3)  # Version claire
    all_colors[[paste0(group, "_other")]] <- colorspace::lighten(base_color, 0.6)  # Version très claire
  }
  
  return(all_colors)
}

plot_wealth_composition_scatterpie <- function(composition_data) {
  # Préparer les données
  plot_data <- prepare_scatterpie_data(composition_data)
  
  all_colors <- create_color_palette()
  
  max_wealth <- max(plot_data$mean_wealth, na.rm = TRUE)
  plot_data$pie_size <- sqrt(plot_data$mean_wealth / max_wealth) * 10
  
  plot_data <- plot_data %>%
    rowwise() %>%
    mutate(
      r_real_estate = mean_real_estate / mean_wealth,
      r_financial = mean_financial / mean_wealth,
      r_professional = mean_professional / mean_wealth,
      r_other = mean_other / mean_wealth
    ) %>%
    ungroup()
  
  plot_data <- plot_data %>%
    rowwise() %>%
    mutate(
      total_ratio = sum(r_real_estate, r_financial, r_professional, r_other),
      r_real_estate = r_real_estate / total_ratio,
      r_financial = r_financial / total_ratio,
      r_professional = r_professional / total_ratio,
      r_other = r_other / total_ratio
    ) %>%
    ungroup()
  
  group_colors <- list()
  for (group in unique(plot_data$origin_group)) {
    group_colors[[group]] <- c(
      all_colors[[paste0(group, "_real_estate")]],
      all_colors[[paste0(group, "_financial")]],
      all_colors[[paste0(group, "_professional")]],
      all_colors[[paste0(group, "_other")]]
    )
  }
  
  ggplot() +
    geom_line(
      data = plot_data,
      aes(x = year, y = mean_wealth, group = origin_group, color = origin_group),
      size = 1.2
    ) +
    geom_scatterpie(
      data = plot_data,
      aes(x = year, y = mean_wealth, group = origin_group, r = pie_size),
      cols = c("r_real_estate", "r_financial", "r_professional", "r_other"),
      alpha = 0.8
    ) +
    scale_color_manual(values = c(
      "Native" = "#1f77b4",
      "Europe" = "#4E79A7",
      "North Africa" = "#F28E2B", 
      "Sub-Saharan Africa" = "#E15759",
      "Other" = "#76B7B2"
    )) +
    scale_y_continuous(
      labels = scales::label_number(scale_cut = scales::cut_short_scale()),
      limits = c(0, max(plot_data$mean_wealth) * 1.2),
      expand = expansion(mult = c(0, 0.2))
    ) +
    scale_x_continuous(breaks = unique(plot_data$year)) +
    labs(
      x = "Year",
      y = "Average Gross Wealth",
      color = "Origin"
    ) +
    theme_minimal() +
    theme(
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold"),
      plot.title = element_text(size = 14, face = "bold"),
      plot.subtitle = element_text(size = 11)
    ) +
    guides(
      color = guide_legend(
        title = "Origine",
        override.aes = list(size = 3)
      )
    )
}

scatterpie_wealth_plot <- plot_wealth_composition_scatterpie(composition_by_origin)
print(scatterpie_wealth_plot)

If you run this R code from scratch, you'll notice that there will be lines instead of pie charts. My goal is to have at each point the average wealth composition (between financial, professional and real estate wealth) for each immigrant group. However for a reason I don't know the pie charts appear as lines. I know it either has to do with the radius or with the scale of my Y axis but every time I try to make changes the pie charts either become gigantic or stretched horizontally or vertically.

My point is just to have small pie charts at each point. Is this possible to do?


Solution

  • As you already guessed the main issue is that the scale of the variable mapped on y and the variable mapped on x differ considerably. One option to fix that would be to use a custom transformation for the y scale. Additionally, I reduced the pie_size. Finally note that geom_scatterpie requires to use coord_fixed to get circles:

    library(dplyr)
    #> 
    #> Attaching package: 'dplyr'
    #> The following objects are masked from 'package:stats':
    #> 
    #>     filter, lag
    #> The following objects are masked from 'package:base':
    #> 
    #>     intersect, setdiff, setequal, union
    library(ggplot2)
    library(scatterpie)
    #> scatterpie v0.2.4 Learn more at https://yulab-smu.top/
    library(colorspace)
    
    set.seed(123) # SEED
    years <- c(1998, 2004, 2010, 2014, 2017, 2020)
    origins <- c("Native", "Europe", "North Africa", "Sub-Saharan Africa", "Other")
    
    composition_by_origin <- expand.grid(
      year = years,
      origin_group = origins
    )
    
    composition_by_origin <- composition_by_origin %>%
      mutate(
        # Patrimoine moyen total par groupe et année
        mean_wealth = case_when(
          origin_group == "Native" ~ 200000 + (year - 1998) * 8000 + rnorm(n(), 0, 10000),
          origin_group == "Europe" ~ 150000 + (year - 1998) * 7000 + rnorm(n(), 0, 9000),
          origin_group == "North Africa" ~ 80000 + (year - 1998) * 4000 + rnorm(n(), 0, 5000),
          origin_group == "Sub-Saharan Africa" ~ 60000 + (year - 1998) * 3000 + rnorm(n(), 0, 4000),
          origin_group == "Other" ~ 100000 + (year - 1998) * 5000 + rnorm(n(), 0, 7000)
        ),
        mean_real_estate = case_when(
          origin_group == "Native" ~ mean_wealth * (0.55 + rnorm(n(), 0, 0.05)),
          origin_group == "Europe" ~ mean_wealth * (0.50 + rnorm(n(), 0, 0.05)),
          origin_group == "North Africa" ~ mean_wealth * (0.65 + rnorm(n(), 0, 0.05)),
          origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.70 + rnorm(n(), 0, 0.05)),
          origin_group == "Other" ~ mean_wealth * (0.60 + rnorm(n(), 0, 0.05))
        ),
        mean_financial = case_when(
          origin_group == "Native" ~ mean_wealth * (0.25 + rnorm(n(), 0, 0.03)),
          origin_group == "Europe" ~ mean_wealth * (0.30 + rnorm(n(), 0, 0.03)),
          origin_group == "North Africa" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.03)),
          origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.03)),
          origin_group == "Other" ~ mean_wealth * (0.20 + rnorm(n(), 0, 0.03))
        ),
        mean_professional = case_when(
          origin_group == "Native" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
          origin_group == "Europe" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
          origin_group == "North Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
          origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
          origin_group == "Other" ~ mean_wealth * (0.12 + rnorm(n(), 0, 0.02))
        )
      )
    
    composition_by_origin <- composition_by_origin %>%
      mutate(
        mean_other = mean_wealth - (mean_real_estate + mean_financial + mean_professional),
        # Corriger les valeurs négatives potentielles
        mean_other = ifelse(mean_other < 0, 0, mean_other)
      )
    
    prepare_scatterpie_data <- function(composition_data) {
      # Sélectionner et renommer les colonnes pertinentes
      plot_data <- composition_data %>%
        select(
          year,
          origin_group,
          mean_wealth,
          mean_real_estate,
          mean_financial,
          mean_professional,
          mean_other
        ) %>%
        # Filtrer pour exclure les valeurs NA ou 0 pour mean_wealth
        filter(!is.na(mean_wealth) & mean_wealth > 0)
    
      return(plot_data)
    }
    
    create_color_palette <- function() {
      base_colors <- c(
        "Native" = "#1f77b4",
        "Europe" = "#4E79A7",
        "North Africa" = "#F28E2B",
        "Sub-Saharan Africa" = "#E15759",
        "Other" = "#76B7B2"
      )
    
      all_colors <- list()
    
      for (group in names(base_colors)) {
        base_color <- base_colors[group]
    
        all_colors[[paste0(group, "_real_estate")]] <- colorspace::darken(base_color, 0.3) # Version foncée
        all_colors[[paste0(group, "_professional")]] <- base_color # Version standard
        all_colors[[paste0(group, "_financial")]] <- colorspace::lighten(base_color, 0.3) # Version claire
        all_colors[[paste0(group, "_other")]] <- colorspace::lighten(base_color, 0.6) # Version très claire
      }
    
      return(all_colors)
    }
    
    plot_wealth_composition_scatterpie <- function(composition_data) {
      # Préparer les données
      plot_data <- prepare_scatterpie_data(composition_data)
    
      all_colors <- create_color_palette()
    
      max_wealth <- max(plot_data$mean_wealth, na.rm = TRUE)
      plot_data$pie_size <- sqrt(plot_data$mean_wealth / max_wealth) * 10
    
      plot_data <- plot_data %>%
        rowwise() %>%
        mutate(
          r_real_estate = mean_real_estate / mean_wealth,
          r_financial = mean_financial / mean_wealth,
          r_professional = mean_professional / mean_wealth,
          r_other = mean_other / mean_wealth
        ) %>%
        ungroup()
    
      plot_data <- plot_data %>%
        rowwise() %>%
        mutate(
          total_ratio = sum(r_real_estate, r_financial, r_professional, r_other),
          r_real_estate = r_real_estate / total_ratio,
          r_financial = r_financial / total_ratio,
          r_professional = r_professional / total_ratio,
          r_other = r_other / total_ratio
        ) %>%
        ungroup()
    
      group_colors <- list()
      for (group in unique(plot_data$origin_group)) {
        group_colors[[group]] <- c(
          all_colors[[paste0(group, "_real_estate")]],
          all_colors[[paste0(group, "_financial")]],
          all_colors[[paste0(group, "_professional")]],
          all_colors[[paste0(group, "_other")]]
        )
      }
    
      divby <- function(value) {
        scales::new_transform(
          paste0("divby-", value),
          function(x) x / value,
          function(x) x * value
        )
      }
    
      ggplot() +
        geom_line(
          data = plot_data,
          aes(x = year, y = mean_wealth, group = origin_group, color = origin_group),
          size = 1.2
        ) +
        geom_scatterpie(
          data = plot_data,
          aes(
            x = year, y = mean_wealth, group = origin_group,
            r = pie_size * .1
          ),
          cols = c("r_real_estate", "r_financial", "r_professional", "r_other"),
          alpha = 0.8
        ) +
        scale_color_manual(values = c(
          "Native" = "#1f77b4",
          "Europe" = "#4E79A7",
          "North Africa" = "#F28E2B",
          "Sub-Saharan Africa" = "#E15759",
          "Other" = "#76B7B2"
        )) +
        scale_y_continuous(
          trans = divby(30000),
          labels = scales::label_number(scale_cut = scales::cut_short_scale()),
          limits = c(0, max(plot_data$mean_wealth) * 1.2),
          expand = expansion(mult = c(0, 0.2))
        ) +
        scale_x_continuous(breaks = unique(plot_data$year)) +
        labs(
          x = "Year",
          y = "Average Gross Wealth",
          color = "Origin"
        ) +
        theme_minimal() +
        theme(
          legend.position = "bottom",
          panel.grid.minor = element_blank(),
          axis.title = element_text(face = "bold"),
          plot.title = element_text(size = 14, face = "bold"),
          plot.subtitle = element_text(size = 11)
        ) +
        guides(
          color = guide_legend(
            title = "Origine",
            override.aes = list(size = 3)
          )
        ) +
        coord_fixed()
    }
    
    scatterpie_wealth_plot <- plot_wealth_composition_scatterpie(composition_by_origin)
    #> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
    #> ℹ Please use `linewidth` instead.
    #> This warning is displayed once every 8 hours.
    #> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
    #> generated.
    print(scatterpie_wealth_plot)