I have two df
as below.
df1 <- structure(list(Sequence = c(
"ABC>EFGHI", "ABC>NOPQ", "ABC>JKLM",
"ABC>RSTUV", "ABC>EFGHI>NOPQ", "ABC>NOPQ>EFGHI", "ABC>NOPQ>RSTUV",
"ABC>EFGHI>RSTUV", "TD2>EFGHI>JKLM", "ABC>JKLM>EFGHI", "ABC>EFGHI>NOPQ>RSTUV",
"ABC>NOPQ>EFGHI>RSTUV", "ABC>JKLM>NOPQ", "ABC>NOPQ>JKLM", "ABC>JKLM>RSTUV",
"ABC>JKLM>NOPQ>RSTUV", "ABC>EFGHI>JKLM>RSTUV", "ABC>JKLM>EFGHI>RSTUV",
"ABC>NOPQ>JKLM>RSTUV"
), Proportion = c(
21.05, 8.4, 5.35, 4.36,
2.87, 2.48, 1.52, 1.27, 1.04, 0.94, 0.66, 0.53, 0.44, 0.36, 0.31,
0.11, 0.07, 0.06, 0.06
), Order = c(
1, 3, 2, 4, 6, 11, 13, 7,
5, 8, 15, 18, 9, 12, 10, 17, 14, 16, 19
), `ABC, NOPQ and JKLM` = c(
NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 75
), `ABC and EFGHI` = c(
NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 66, NA, NA
), `NOPQ and RSTUV` = c(
NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 70, NA,
NA, NA
), RSTUV = c(
NA, NA, NA, 71, NA, NA, 76, 71, NA, NA, 75,
78, NA, NA, 61, NA, 70, 78, 77
), NOPQ = c(
NA, 66, NA, NA, 66,
65, 74, NA, NA, NA, 73, 74, 60, 60, NA, NA, NA, NA, NA
), JKLM = c(
NA,
NA, 51, NA, NA, NA, NA, NA, 56, 52, NA, NA, 56, 62, 59, 68, 67,
71, NA
), EFGHI = c(
59, NA, NA, NA, 63, 67, NA, 69, 54, 54, 72,
76, NA, NA, NA, NA, NA, 76, NA
), ABC = c(
56, 63, 48, 69, 61,
63, 72, 66, 53, 50, 71, 73, 54, 58, 58, 66, NA, 68, NA
)), row.names = c(
NA,
-19L
), class = "data.frame")
df1_long <- df1 %>%
pivot_longer(-c(1:3), names_to = "event") %>%
filter(!is.na(value)) %>%
arrange(Order, value) %>%
mutate(Sequence = fct_inorder(Sequence)) %>% arrange(desc(Proportion))
This is my code and plot. How can I fix the label overlapping inside the plot (e.g. 'ABC, NOPQ and JKLM' and 'RSTUV' also 'ABC and EFGHI' and 'JLKM')?
I have tried ggrepel
using geom_text_repel(aes(label = event))
without success.
Also, how can I increase the space between the lines inside the plot so it does not look so busy.
df1_long %>%
ggplot(aes(value, Sequence, color = event)) +
geom_path(
aes(group = Sequence),
linewidth = 1.0,
arrow = arrow(length = unit(5, "pt"))
) +
geom_point() +
geom_label(aes(label = event),
vjust = 1, fill = NA, label.size = 0,
label.padding = unit(8, "pt"),
color = "black"
) +
geom_label(aes(label = value),
vjust = 0, fill = NA, label.size = 0,
label.padding = unit(8, "pt"),
color = "black"
) +
geom_text(
data = df1,
aes(label = scales::percent(Proportion, scale = 1, accuracy = 0.01)),
x = 87,
color = "black", hjust = "inward"
) +
scale_x_continuous(breaks = c(45, 85, 15)) +
scale_x_continuous(expand = c(0.05, 0, 0.05, 5)) +
scale_color_brewer(type = "qual", palette = 8) +
guides(color = "none") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Age",
y = "Sequence"
)
Two suggestions would be to change the "and"s into "+"s and reducing the size of the labels. This goes part of the way, then using ggrepel takes care of the last few overlaps:
df1_long %>%
mutate(event = str_replace_all(event, " and ", "+")) |>
ggplot(aes(value, Sequence, color = event)) +
geom_path(
aes(group = Sequence),
linewidth = 1.0,
arrow = arrow(length = unit(5, "pt"))
) +
geom_point() +
ggrepel::geom_text_repel(aes(label = event),
vjust = 2, color = "black", angle = 0, size = 3, direction = "both", force_pull = 1,
) + ...
One other note, geom_text is a lot quicker than geom_label, so unless you want labels, use geom_text instead.