I used the sankeyNetwork
function from the networkD3
R package to draw a sankey plot, aiming to show the changes in the number of cell types at different levels, but my custom colors don't match the cell type I set.
I used
library(networkD3)
source <- c("Epithelial", "Epithelial", "Endothelial", "Stroma", "Endothelial", "Immune",
"Immune", "Proliferating", "Stroma", "Fibroblast lineage", "Fibroblast lineage",
"Alveolar epithelium", "Alveolar epithelium", "Lymphoid", "Airway epithelium",
"CEC1", "Airway epithelium", "Airway epithelium", "LEC1", "Myeloid1", "Myeloid1",
"Myeloid1", "Myeloid1", "Proliferating2", "SM1", "Lymphoid")
target <- c("Airway epithelium", "Alveolar epithelium", "CEC1", "Fibroblast lineage",
"LEC1", "Lymphoid", "Myeloid1", "Proliferating2", "SM1", "Alf1", "Alf2", "AT1",
"AT2", "B", "Basal", "CEC2", "Ciliated", "Club", "LEC2", "Macrophages", "MAST",
"Monocytes", "Myeloid2", "Proliferating3", "SM2", "T_cell")
value <- c(14612, 18191, 15878, 21459, 4131, 30553, 38800, 743, 10607, 9946, 11513, 11568,
6623, 2436, 390, 15878, 11449, 2773, 4131, 11989, 2802, 13628, 10381, 743, 10607, 28117)
source2 <- c(0, 0, 1, 2, 1, 3, 3, 4, 2, 5, 5, 6, 6, 7, 8, 9, 8, 8, 10, 11, 11, 11, 11, 12, 13, 7)
target2 <- c(8, 6, 9, 5, 10, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)
group <- c("Epithelial", "Epithelial", "Endothelial", "Stroma", "Endothelial", "Immune",
"Immune", "Proliferating", "Stroma", "Fibroblast lineage", "Fibroblast lineage",
"Alveolar epithelium", "Alveolar epithelium", "Lymphoid", "Airway epithelium",
"CEC1", "Airway epithelium", "Airway epithelium", "LEC1", "Myeloid1", "Myeloid1",
"Myeloid1", "Myeloid1", "Proliferating2", "SM1", "Lymphoid")
flows <- data.frame(source, target, value, source2, target2, group)
nodes <- data.frame(name = unique(c(flows$source, flows$target)))
colourScale <- JS(
'd3.scaleOrdinal()
.domain(["Epithelial", "Endothelial", "Stroma", "Immune",
"Alveolar epithelium", "Airway epithelium",
"AT1", "AT2", "Basal", "Ciliated",
"Fibroblast lineage", "SM1",
"Alf1", "Alf2", "SM2",
"CEC1", "LEC1",
"CEC2", "LEC2",
"Myeloid1", "Lymphoid",
"B", "Macrophages", "MAST", "Monocytes", "Myeloid2", "T_cell",
"Proliferating", "Proliferating2", "Proliferating3"])
.range(["#3a86ff", "#ffbe0b", "#8338ec", "#d62828",
"#0081a7", "#0081a7",
"#a8dadc", "#a8dadc", "#a8dadc", "#a8dadc",
"#9d4edd", "#9d4edd",
"#cdb4db", "#cdb4db", "#cdb4db",
"#ffffb3", "#ffffb3",
"#ffffb3", "#ffffb3",
"#e76f51", "#e76f51",
"#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072",
"#a98467", "#a98467", "#a98467"])'
)
sankeyNetwork(
Links = flows,
Nodes = nodes,
Source = "source2",
Target = "target2",
Value = "value",
NodeID = "name",
units = "T",
fontSize = 12,
nodeWidth = 30,
LinkGroup = "group", # link group
# NodeGroup = "target", # node group
colourScale = colourScale # set color scale
)
The plot isn't as expected.
The problem is that the underlying D3/JavaScript only considers the group names until the first space, so the groups with a space in their name, i.e. "Alveolar epithelium", "Airway epithelium" and "Fibroblast lineage", do not process as expected.
You could fix that easily by swapping the space for a "_" in the flows
and nodes
dataframes and the colourScale
JS code like so (though I'd strongly suggest just fixing it in the original data rather than post-processing it like this)
flows$group <- gsub(" ", "_", flows$group)
nodes$group <- gsub(" ", "_", nodes$name)
colourScale <- gsub("([[:alpha:]]) ([[:alpha:]])", "\\1_\\2", colourScale)
sankeyNetwork(
Links = flows,
Nodes = nodes,
Source = "source2",
Target = "target2",
Value = "value",
NodeID = "name",
units = "T",
fontSize = 12,
nodeWidth = 30,
LinkGroup = "group", # link group
NodeGroup = "group", # node group
colourScale = colourScale # set color scale
)
Additionally, I would strongly suggest adding the color data directly to your flows
and nodes
data frames and then using a simple JS identity function to access them rather than the complicated JS domain and range specification, like so
colors <-
data.frame(
group = c("Epithelial", "Endothelial", "Stroma", "Immune",
"Alveolar epithelium", "Airway epithelium",
"AT1", "AT2", "Basal", "Ciliated",
"Fibroblast lineage", "SM1",
"Alf1", "Alf2", "SM2",
"CEC1", "LEC1",
"CEC2", "LEC2",
"Myeloid1", "Lymphoid",
"B", "Macrophages", "MAST", "Monocytes", "Myeloid2", "T_cell",
"Proliferating", "Proliferating2", "Proliferating3"),
color = c("#3a86ff", "#ffbe0b", "#8338ec", "#d62828",
"#0081a7", "#0081a7",
"#a8dadc", "#a8dadc", "#a8dadc", "#a8dadc",
"#9d4edd", "#9d4edd",
"#cdb4db", "#cdb4db", "#cdb4db",
"#ffffb3", "#ffffb3",
"#ffffb3", "#ffffb3",
"#e76f51", "#e76f51",
"#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072",
"#a98467", "#a98467", "#a98467")
)
flows$color <- colors$color[match(flows$group, colors$group)]
nodes$color <- colors$color[match(nodes$name, colors$group)]
sankeyNetwork(
Links = flows,
Nodes = nodes,
Source = "source2",
Target = "target2",
Value = "value",
NodeID = "name",
units = "T",
fontSize = 12,
nodeWidth = 30,
LinkGroup = "color",
NodeGroup = "color",
colourScale = "f => f"
)