i have a data frame in R called df_ that looks like this :
df_
# A tibble: 40 × 4
# Groups: Year [5]
Year Country mu Color
<int> <fct> <dbl> <fct>
1 2019 ALPHA 68.9 red
2 2019 BETA 64.8 black
3 2019 GAMMA 70.0 yellow
4 2019 RHO 65.2 gray
5 2019 DELTA 70.1 green
6 2019 EPSILON 69.6 pink
7 2019 THETA 69.8 purple
8 2019 OMEGA 67.9 orange
9 2020 ALPHA 69.3 red
10 2020 BETA 65.2 black
# ℹ 30 more rows
i want 2 things:
a) to apply each line the color of the corresponding country b) at the end of each line to display the text of each country column and the last value (ie the year 2024) . For example at the end and at the right of the line to display ("ALPHA ,76.4").
How can i do these in R using ggplot2 ?
ggplot(df_, aes(x = Year, y = mu,color = Color, group =Country)) +
geom_line(size = 1.5) +
geom_point() +
labs(x = "Years", y = "") +
theme_minimal() +
theme(legend.position = "none")
data
structure(list(Year = c(2019L, 2019L, 2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L,
2020L, 2022L, 2022L, 2022L, 2022L, 2022L, 2022L, 2022L, 2022L,
2023L, 2023L, 2023L, 2023L, 2023L, 2023L, 2023L, 2023L, 2024L,
2024L, 2024L, 2024L, 2024L, 2024L, 2024L, 2024L), Country = structure(c(1L,
2L, 5L, 7L, 3L, 4L, 8L, 6L, 1L, 2L, 5L, 7L, 3L, 4L, 8L, 6L, 1L,
2L, 5L, 7L, 3L, 4L, 8L, 6L, 1L, 2L, 5L, 7L, 3L, 4L, 8L, 6L, 1L,
2L, 5L, 7L, 3L, 4L, 8L, 6L), levels = c("ALPHA", "BETA", "DELTA",
"EPSILON", "GAMMA", "OMEGA", "RHO", "THETA"), class = "factor"),
mu = c(68.855, 64.77, 69.9875, 65.22, 70.1266666666667, 69.6166666666667,
69.8085714285714, 67.9093333333333, 69.2675, 65.2, 72.4075,
69.49, 72.28, 69.262, 70.07125, 65.3864285714286, 74.6584615384615,
67.77, 75.3533333333333, 73, 64.09, 73.1715384615385, 66.058,
72.12, 75.5645833333333, 70.46, 78.2933333333333, 79.07,
59.82, 79.6361538461538, 74.225, 69.5871428571429, 76.4007407407407,
67.91, 76.805, 77.31, 74.0966666666667, 81.2811764705882,
74.6671428571428, 78.0316666666667), Color = structure(c(7L,
1L, 8L, 2L, 3L, 5L, 6L, 4L, 7L, 1L, 8L, 2L, 3L, 5L, 6L, 4L,
7L, 1L, 8L, 2L, 3L, 5L, 6L, 4L, 7L, 1L, 8L, 2L, 3L, 5L, 6L,
4L, 7L, 1L, 8L, 2L, 3L, 5L, 6L, 4L), levels = c("black",
"gray", "green", "orange", "pink", "purple", "red", "yellow"
), class = "factor")), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -40L), groups = structure(list(
Year = c(2019L, 2020L, 2022L, 2023L, 2024L), .rows = structure(list(
1:8, 9:16, 17:24, 25:32, 33:40), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
edit
addding:
mutate(label = if_else(Year == max(Year), as.character(Country), NA_character_))
in the data frame and in the plot :
theme(legend.position = "none") +
geom_label_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE)
There are plenty of options.
library(ggplot2)
library(ggrepel)
library(dplyr)
X |>
ggplot() +
geom_line(aes(x=Year, y=mu, colour=Color, group=Country), size=1.5) +
geom_point(aes(x=Year, y=mu, colour=Color, group=Country)) +
geom_text_repel(data = X |> filter(Year==2024) |>
mutate(lbl=sprintf("%s, %.01f", Country, mu)),
aes(label=lbl, x=Year, y=mu, colour=Color),
nudge_x=.5, size=2.5) +
labs(x="Years", y="") +
theme_minimal() +
theme(legend.position="none")
where X
is your data. For example, a reference on SO can be found here.