Please find a fully reproducible example of my code using fake data :
library(dplyr)
library(ggplot2)
library(scatterpie)
library(colorspace)
set.seed(123) # SEED
years <- c(1998, 2004, 2010, 2014, 2017, 2020)
origins <- c("Native", "Europe", "North Africa", "Sub-Saharan Africa", "Other")
composition_by_origin <- expand.grid(
year = years,
origin_group = origins
)
composition_by_origin <- composition_by_origin %>%
mutate(
# Patrimoine moyen total par groupe et année
mean_wealth = case_when(
origin_group == "Native" ~ 200000 + (year - 1998) * 8000 + rnorm(n(), 0, 10000),
origin_group == "Europe" ~ 150000 + (year - 1998) * 7000 + rnorm(n(), 0, 9000),
origin_group == "North Africa" ~ 80000 + (year - 1998) * 4000 + rnorm(n(), 0, 5000),
origin_group == "Sub-Saharan Africa" ~ 60000 + (year - 1998) * 3000 + rnorm(n(), 0, 4000),
origin_group == "Other" ~ 100000 + (year - 1998) * 5000 + rnorm(n(), 0, 7000)
),
mean_real_estate = case_when(
origin_group == "Native" ~ mean_wealth * (0.55 + rnorm(n(), 0, 0.05)),
origin_group == "Europe" ~ mean_wealth * (0.50 + rnorm(n(), 0, 0.05)),
origin_group == "North Africa" ~ mean_wealth * (0.65 + rnorm(n(), 0, 0.05)),
origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.70 + rnorm(n(), 0, 0.05)),
origin_group == "Other" ~ mean_wealth * (0.60 + rnorm(n(), 0, 0.05))
),
mean_financial = case_when(
origin_group == "Native" ~ mean_wealth * (0.25 + rnorm(n(), 0, 0.03)),
origin_group == "Europe" ~ mean_wealth * (0.30 + rnorm(n(), 0, 0.03)),
origin_group == "North Africa" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.03)),
origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.03)),
origin_group == "Other" ~ mean_wealth * (0.20 + rnorm(n(), 0, 0.03))
),
mean_professional = case_when(
origin_group == "Native" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
origin_group == "Europe" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
origin_group == "North Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
origin_group == "Other" ~ mean_wealth * (0.12 + rnorm(n(), 0, 0.02))
)
)
composition_by_origin <- composition_by_origin %>%
mutate(
mean_other = mean_wealth - (mean_real_estate + mean_financial + mean_professional),
# Corriger les valeurs négatives potentielles
mean_other = ifelse(mean_other < 0, 0, mean_other)
)
prepare_scatterpie_data <- function(composition_data) {
# Sélectionner et renommer les colonnes pertinentes
plot_data <- composition_data %>%
select(
year,
origin_group,
mean_wealth,
mean_real_estate,
mean_financial,
mean_professional,
mean_other
) %>%
# Filtrer pour exclure les valeurs NA ou 0 pour mean_wealth
filter(!is.na(mean_wealth) & mean_wealth > 0)
return(plot_data)
}
create_color_palette <- function() {
base_colors <- c(
"Native" = "#1f77b4",
"Europe" = "#4E79A7",
"North Africa" = "#F28E2B",
"Sub-Saharan Africa" = "#E15759",
"Other" = "#76B7B2"
)
all_colors <- list()
for (group in names(base_colors)) {
base_color <- base_colors[group]
all_colors[[paste0(group, "_real_estate")]] <- colorspace::darken(base_color, 0.3) # Version foncée
all_colors[[paste0(group, "_professional")]] <- base_color # Version standard
all_colors[[paste0(group, "_financial")]] <- colorspace::lighten(base_color, 0.3) # Version claire
all_colors[[paste0(group, "_other")]] <- colorspace::lighten(base_color, 0.6) # Version très claire
}
return(all_colors)
}
plot_wealth_composition_scatterpie <- function(composition_data) {
# Préparer les données
plot_data <- prepare_scatterpie_data(composition_data)
all_colors <- create_color_palette()
max_wealth <- max(plot_data$mean_wealth, na.rm = TRUE)
plot_data$pie_size <- sqrt(plot_data$mean_wealth / max_wealth) * 10
plot_data <- plot_data %>%
rowwise() %>%
mutate(
r_real_estate = mean_real_estate / mean_wealth,
r_financial = mean_financial / mean_wealth,
r_professional = mean_professional / mean_wealth,
r_other = mean_other / mean_wealth
) %>%
ungroup()
plot_data <- plot_data %>%
rowwise() %>%
mutate(
total_ratio = sum(r_real_estate, r_financial, r_professional, r_other),
r_real_estate = r_real_estate / total_ratio,
r_financial = r_financial / total_ratio,
r_professional = r_professional / total_ratio,
r_other = r_other / total_ratio
) %>%
ungroup()
group_colors <- list()
for (group in unique(plot_data$origin_group)) {
group_colors[[group]] <- c(
all_colors[[paste0(group, "_real_estate")]],
all_colors[[paste0(group, "_financial")]],
all_colors[[paste0(group, "_professional")]],
all_colors[[paste0(group, "_other")]]
)
}
ggplot() +
geom_line(
data = plot_data,
aes(x = year, y = mean_wealth, group = origin_group, color = origin_group),
size = 1.2
) +
geom_scatterpie(
data = plot_data,
aes(x = year, y = mean_wealth, group = origin_group, r = pie_size),
cols = c("r_real_estate", "r_financial", "r_professional", "r_other"),
alpha = 0.8
) +
scale_color_manual(values = c(
"Native" = "#1f77b4",
"Europe" = "#4E79A7",
"North Africa" = "#F28E2B",
"Sub-Saharan Africa" = "#E15759",
"Other" = "#76B7B2"
)) +
scale_y_continuous(
labels = scales::label_number(scale_cut = scales::cut_short_scale()),
limits = c(0, max(plot_data$mean_wealth) * 1.2),
expand = expansion(mult = c(0, 0.2))
) +
scale_x_continuous(breaks = unique(plot_data$year)) +
labs(
x = "Year",
y = "Average Gross Wealth",
color = "Origin"
) +
theme_minimal() +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
axis.title = element_text(face = "bold"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11)
) +
guides(
color = guide_legend(
title = "Origine",
override.aes = list(size = 3)
)
)
}
scatterpie_wealth_plot <- plot_wealth_composition_scatterpie(composition_by_origin)
print(scatterpie_wealth_plot)
If you run this R code from scratch, you'll notice that there will be lines instead of pie charts. My goal is to have at each point the average wealth composition (between financial, professional and real estate wealth) for each immigrant group. However for a reason I don't know the pie charts appear as lines. I know it either has to do with the radius or with the scale of my Y axis but every time I try to make changes the pie charts either become gigantic or stretched horizontally or vertically.
My point is just to have small pie charts at each point. Is this possible to do?
As you already guessed the main issue is that the scale of the variable mapped on y
and the variable mapped on x
differ considerably. One option to fix that would be to use a custom transformation for the y scale. Additionally, I reduced the pie_size
. Finally note that geom_scatterpie
requires to use coord_fixed
to get circles:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(scatterpie)
#> scatterpie v0.2.4 Learn more at https://yulab-smu.top/
library(colorspace)
set.seed(123) # SEED
years <- c(1998, 2004, 2010, 2014, 2017, 2020)
origins <- c("Native", "Europe", "North Africa", "Sub-Saharan Africa", "Other")
composition_by_origin <- expand.grid(
year = years,
origin_group = origins
)
composition_by_origin <- composition_by_origin %>%
mutate(
# Patrimoine moyen total par groupe et année
mean_wealth = case_when(
origin_group == "Native" ~ 200000 + (year - 1998) * 8000 + rnorm(n(), 0, 10000),
origin_group == "Europe" ~ 150000 + (year - 1998) * 7000 + rnorm(n(), 0, 9000),
origin_group == "North Africa" ~ 80000 + (year - 1998) * 4000 + rnorm(n(), 0, 5000),
origin_group == "Sub-Saharan Africa" ~ 60000 + (year - 1998) * 3000 + rnorm(n(), 0, 4000),
origin_group == "Other" ~ 100000 + (year - 1998) * 5000 + rnorm(n(), 0, 7000)
),
mean_real_estate = case_when(
origin_group == "Native" ~ mean_wealth * (0.55 + rnorm(n(), 0, 0.05)),
origin_group == "Europe" ~ mean_wealth * (0.50 + rnorm(n(), 0, 0.05)),
origin_group == "North Africa" ~ mean_wealth * (0.65 + rnorm(n(), 0, 0.05)),
origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.70 + rnorm(n(), 0, 0.05)),
origin_group == "Other" ~ mean_wealth * (0.60 + rnorm(n(), 0, 0.05))
),
mean_financial = case_when(
origin_group == "Native" ~ mean_wealth * (0.25 + rnorm(n(), 0, 0.03)),
origin_group == "Europe" ~ mean_wealth * (0.30 + rnorm(n(), 0, 0.03)),
origin_group == "North Africa" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.03)),
origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.03)),
origin_group == "Other" ~ mean_wealth * (0.20 + rnorm(n(), 0, 0.03))
),
mean_professional = case_when(
origin_group == "Native" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
origin_group == "Europe" ~ mean_wealth * (0.15 + rnorm(n(), 0, 0.02)),
origin_group == "North Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
origin_group == "Sub-Saharan Africa" ~ mean_wealth * (0.10 + rnorm(n(), 0, 0.02)),
origin_group == "Other" ~ mean_wealth * (0.12 + rnorm(n(), 0, 0.02))
)
)
composition_by_origin <- composition_by_origin %>%
mutate(
mean_other = mean_wealth - (mean_real_estate + mean_financial + mean_professional),
# Corriger les valeurs négatives potentielles
mean_other = ifelse(mean_other < 0, 0, mean_other)
)
prepare_scatterpie_data <- function(composition_data) {
# Sélectionner et renommer les colonnes pertinentes
plot_data <- composition_data %>%
select(
year,
origin_group,
mean_wealth,
mean_real_estate,
mean_financial,
mean_professional,
mean_other
) %>%
# Filtrer pour exclure les valeurs NA ou 0 pour mean_wealth
filter(!is.na(mean_wealth) & mean_wealth > 0)
return(plot_data)
}
create_color_palette <- function() {
base_colors <- c(
"Native" = "#1f77b4",
"Europe" = "#4E79A7",
"North Africa" = "#F28E2B",
"Sub-Saharan Africa" = "#E15759",
"Other" = "#76B7B2"
)
all_colors <- list()
for (group in names(base_colors)) {
base_color <- base_colors[group]
all_colors[[paste0(group, "_real_estate")]] <- colorspace::darken(base_color, 0.3) # Version foncée
all_colors[[paste0(group, "_professional")]] <- base_color # Version standard
all_colors[[paste0(group, "_financial")]] <- colorspace::lighten(base_color, 0.3) # Version claire
all_colors[[paste0(group, "_other")]] <- colorspace::lighten(base_color, 0.6) # Version très claire
}
return(all_colors)
}
plot_wealth_composition_scatterpie <- function(composition_data) {
# Préparer les données
plot_data <- prepare_scatterpie_data(composition_data)
all_colors <- create_color_palette()
max_wealth <- max(plot_data$mean_wealth, na.rm = TRUE)
plot_data$pie_size <- sqrt(plot_data$mean_wealth / max_wealth) * 10
plot_data <- plot_data %>%
rowwise() %>%
mutate(
r_real_estate = mean_real_estate / mean_wealth,
r_financial = mean_financial / mean_wealth,
r_professional = mean_professional / mean_wealth,
r_other = mean_other / mean_wealth
) %>%
ungroup()
plot_data <- plot_data %>%
rowwise() %>%
mutate(
total_ratio = sum(r_real_estate, r_financial, r_professional, r_other),
r_real_estate = r_real_estate / total_ratio,
r_financial = r_financial / total_ratio,
r_professional = r_professional / total_ratio,
r_other = r_other / total_ratio
) %>%
ungroup()
group_colors <- list()
for (group in unique(plot_data$origin_group)) {
group_colors[[group]] <- c(
all_colors[[paste0(group, "_real_estate")]],
all_colors[[paste0(group, "_financial")]],
all_colors[[paste0(group, "_professional")]],
all_colors[[paste0(group, "_other")]]
)
}
divby <- function(value) {
scales::new_transform(
paste0("divby-", value),
function(x) x / value,
function(x) x * value
)
}
ggplot() +
geom_line(
data = plot_data,
aes(x = year, y = mean_wealth, group = origin_group, color = origin_group),
size = 1.2
) +
geom_scatterpie(
data = plot_data,
aes(
x = year, y = mean_wealth, group = origin_group,
r = pie_size * .1
),
cols = c("r_real_estate", "r_financial", "r_professional", "r_other"),
alpha = 0.8
) +
scale_color_manual(values = c(
"Native" = "#1f77b4",
"Europe" = "#4E79A7",
"North Africa" = "#F28E2B",
"Sub-Saharan Africa" = "#E15759",
"Other" = "#76B7B2"
)) +
scale_y_continuous(
trans = divby(30000),
labels = scales::label_number(scale_cut = scales::cut_short_scale()),
limits = c(0, max(plot_data$mean_wealth) * 1.2),
expand = expansion(mult = c(0, 0.2))
) +
scale_x_continuous(breaks = unique(plot_data$year)) +
labs(
x = "Year",
y = "Average Gross Wealth",
color = "Origin"
) +
theme_minimal() +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
axis.title = element_text(face = "bold"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11)
) +
guides(
color = guide_legend(
title = "Origine",
override.aes = list(size = 3)
)
) +
coord_fixed()
}
scatterpie_wealth_plot <- plot_wealth_composition_scatterpie(composition_by_origin)
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
print(scatterpie_wealth_plot)