Using the package circlize
from R
, My objective is to be able to group a chord diagram by year within each sector. New to the package, I started from the beginning by following examples in the tutorial. According to the vignette, grouping is possible (even using data.frames) by passing a group
flag to the chordDiagram()
command. This is stated in Chapter 15.6 Multiple-group Chord diagram. Following the vignette, i was able to produce a chord diagram but I am stuck on how to group to get my desired result. I have put together an example of a visual of what I would like the chord diagram to look like here:
As you can see, I aim to have each sector (OOP, UVA, WSE, FIN, MAT, OIC) grouped by the year (which is a column in the input data.frame. I can get the chord diagram, but without the years added.
A reproducible example
Creating a data.frame
Types <- data.frame(Types = c("OOP", "UVA", "MAT", "OIC", "FIN", "WSE"))
Type_Cols <- c(OOP = "#548235", UVA = "#660066", MAT = "#4472C4", OIC = "#002060", FIN = "#843C0C", WSE = "#C55A11")
stack.df <- data.frame(Year = c(rep(2019, 1), rep(2020, 4), rep(2021, 7), rep(2022, 11), rep(2023, 11)),
Invoice = c(paste0("2019.", "10", ".INV"),
paste0("2020.", seq(from = 20, to = 23, by = 1), ".INV"),
paste0("2021.", seq(from = 30, to = 36, by = 1), ".INV"),
paste0("2022.", seq(from = 40, to = 50, by = 1), ".INV"),
paste0("2023.", seq(from = 50, to = 60, by = 1), ".INV")))
stack.df <- cbind(stack.df, Org_1 = Types[sample(nrow(Types), nrow(stack.df), replace = TRUE), ], Org_2 = Types[sample(nrow(Types), nrow(stack.df), replace = TRUE), ])
Adding lty & colors for links
stack.df$lty <- sample(x = rep(c(1,2), times = nrow(stack.df)), size = nrow(stack.df), replace = TRUE)
stack.df$Link_cols <- stack.df$Year
stack.df$Link_cols <- ifelse(stack.df$Link_cols == 2019, "#D9D9D9",
ifelse(stack.df$Link_cols == 2020, "#B296B6",
ifelse(stack.df$Link_cols == 2021, "#FFD966",
ifelse(stack.df$Link_cols == 2022, "#D5469E",
ifelse(stack.df$Link_cols == 2023, "#B4C2A7", stack.df$Link_cols)))))
Re-arranging the stack.df
stack.df <- stack.df[, c(3,4, 2, 1, 5, 6)]
Graph the Chord Diagram
library(circlize)
chordDiagramFromDataFrame(stack.df[, c(1:2)], order = sort(union(stack.df$Org_1, stack.df$Org_2)),
grid.col = Type_Cols, link.lty = stack.df$lty, directional = 1, direction.type = "arrows",
link.arr.col = c("black", rep("white", nrow(stack.df) - 1)))
This gives the following chord diagram:
To produce the group parameter, the names in group should cover all sector names. This is where I am stuck, the vignette gives an example using a matrix, but not a data.frame. I have attempted to work around this buy writing similar codes like this:
group <- structure(union(stack.df$Org_1, stack.df$Org_2), names = unique(stack.df$Year))
But an approach like this gets me nowhere. Any ideas to get me unstuck? Thank-you!
library(dplyr)
stack.df <- stack.df %>%
mutate(Org_1 = paste0(Org_1, Year),
Org_2 = paste0(Org_2, Year))
head(stack.df)
Org_1 Org_2 Invoice Year lty Link_cols
1 OOP2019 UVA2019 2019.10.INV 2019 1 #D9D9D9
2 MAT2020 OIC2020 2020.20.INV 2020 1 #B296B6
3 MAT2020 FIN2020 2020.21.INV 2020 2 #B296B6
4 WSE2020 OOP2020 2020.22.INV 2020 2 #B296B6
5 OOP2020 OOP2020 2020.23.INV 2020 1 #B296B6
6 OOP2021 UVA2021 2021.30.INV 2021 2 #FFD966
sectors <- sort(unique(c(stack.df$Org_1, stack.df$Org_2)))
df.groups <- structure(gsub("\\d", "", sectors), names = sectors)
df.groups
FIN2020 FIN2021 FIN2022 FIN2023 MAT2020 MAT2021 MAT2022 MAT2023 OIC2020 OIC2021 OIC2022 OIC2023 OOP2019
"FIN" "FIN" "FIN" "FIN" "MAT" "MAT" "MAT" "MAT" "OIC" "OIC" "OIC" "OIC" "OOP"
OOP2020 OOP2021 OOP2022 OOP2023 UVA2019 UVA2021 UVA2022 UVA2023 WSE2020 WSE2021 WSE2022 WSE2023
"OOP" "OOP" "OOP" "OOP" "UVA" "UVA" "UVA" "UVA" "WSE" "WSE" "WSE" "WSE"
circos.clear()
chordDiagramFromDataFrame(stack.df %>% select(Org_1, Org_2),
order = sectors,
grid.col = structure(Type_Cols[match(gsub("\\d", "", sectors), labels(Type_Cols))], names = sectors),
link.lty = stack.df$lty,
directional = 1,
direction.type = "arrows",
link.arr.col = c("black", rep("white", nrow(stack.df) - 1)),
group = df.groups,
annotationTrack = c("grid", "axis"),
preAllocateTracks = 1)
for(si in get.all.sector.index()) {
xlim = get.cell.meta.data("xlim", sector.index = si, track.index = 2)
ylim = get.cell.meta.data("ylim", sector.index = si, track.index = 2)
circos.text(mean(xlim), mean(ylim), substr(si, 4, 7), sector.index = si,
col = "white", track.index = 2, cex = .7)
}
for (i in unique(df.groups)) {
highlight.sector(grep(i, get.all.sector.index(), value = T), track.index = 1,
col = Type_Cols[match(i, labels(Type_Cols))], text = i,
text.vjust = -1, padding = c(-.2, 0, -.5, 0))
}