javascriptrsankey-diagramhtmlwidgetsnetworkd3

How do I set some custom colors to link when I use sankeyNetwork?


I used the sankeyNetwork function from the networkD3 R package to draw a sankey plot, aiming to show the changes in the number of cell types at different levels, but my custom colors don't match the cell type I set.

I used

library(networkD3)

source <- c("Epithelial", "Epithelial", "Endothelial", "Stroma", "Endothelial", "Immune", 
            "Immune", "Proliferating", "Stroma", "Fibroblast lineage", "Fibroblast lineage", 
            "Alveolar epithelium", "Alveolar epithelium", "Lymphoid", "Airway epithelium", 
            "CEC1", "Airway epithelium", "Airway epithelium", "LEC1", "Myeloid1", "Myeloid1", 
            "Myeloid1", "Myeloid1", "Proliferating2", "SM1", "Lymphoid")
target <- c("Airway epithelium", "Alveolar epithelium", "CEC1", "Fibroblast lineage", 
            "LEC1", "Lymphoid", "Myeloid1", "Proliferating2", "SM1", "Alf1", "Alf2", "AT1", 
            "AT2", "B", "Basal", "CEC2", "Ciliated", "Club", "LEC2", "Macrophages", "MAST", 
            "Monocytes", "Myeloid2", "Proliferating3", "SM2", "T_cell")
value <- c(14612, 18191, 15878, 21459, 4131, 30553, 38800, 743, 10607, 9946, 11513, 11568, 
           6623, 2436, 390, 15878, 11449, 2773, 4131, 11989, 2802, 13628, 10381, 743, 10607, 28117)
source2 <- c(0, 0, 1, 2, 1, 3, 3, 4, 2, 5, 5, 6, 6, 7, 8, 9, 8, 8, 10, 11, 11, 11, 11, 12, 13, 7)
target2 <- c(8, 6, 9, 5, 10, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)
group <- c("Epithelial", "Epithelial", "Endothelial", "Stroma", "Endothelial", "Immune", 
           "Immune", "Proliferating", "Stroma", "Fibroblast lineage", "Fibroblast lineage", 
           "Alveolar epithelium", "Alveolar epithelium", "Lymphoid", "Airway epithelium", 
           "CEC1", "Airway epithelium", "Airway epithelium", "LEC1", "Myeloid1", "Myeloid1", 
           "Myeloid1", "Myeloid1", "Proliferating2", "SM1", "Lymphoid")


flows <- data.frame(source, target, value, source2, target2, group)
nodes <- data.frame(name = unique(c(flows$source, flows$target)))

colourScale <- JS(
'd3.scaleOrdinal()
    .domain(["Epithelial", "Endothelial", "Stroma", "Immune", 
             "Alveolar epithelium", "Airway epithelium",
             "AT1", "AT2", "Basal", "Ciliated",
             "Fibroblast lineage", "SM1",
             "Alf1", "Alf2", "SM2",
             "CEC1", "LEC1",
             "CEC2", "LEC2",
             "Myeloid1", "Lymphoid",
             "B", "Macrophages", "MAST", "Monocytes", "Myeloid2", "T_cell",
             "Proliferating", "Proliferating2", "Proliferating3"])
    .range(["#3a86ff", "#ffbe0b", "#8338ec", "#d62828", 
            "#0081a7", "#0081a7",
            "#a8dadc", "#a8dadc", "#a8dadc", "#a8dadc",
            "#9d4edd", "#9d4edd",
            "#cdb4db", "#cdb4db", "#cdb4db",
            "#ffffb3", "#ffffb3",
            "#ffffb3", "#ffffb3",
            "#e76f51", "#e76f51",
            "#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072",
            "#a98467", "#a98467", "#a98467"])'
)

sankeyNetwork(
  Links = flows, 
  Nodes = nodes,
  Source = "source2", 
  Target = "target2", 
  Value = "value", 
  NodeID = "name", 
  units = "T",
  fontSize = 12, 
  nodeWidth = 30,
  LinkGroup = "group",       # link group
  # NodeGroup = "target",       # node group
  colourScale = colourScale  # set color scale
)

The plot isn't as expected.

enter image description here


Solution

  • The problem is that the underlying D3/JavaScript only considers the group names until the first space, so the groups with a space in their name, i.e. "Alveolar epithelium", "Airway epithelium" and "Fibroblast lineage", do not process as expected.

    You could fix that easily by swapping the space for a "_" in the flows and nodes dataframes and the colourScale JS code like so (though I'd strongly suggest just fixing it in the original data rather than post-processing it like this)

    flows$group <- gsub(" ", "_", flows$group)
    nodes$group <- gsub(" ", "_", nodes$name)
    colourScale <- gsub("([[:alpha:]]) ([[:alpha:]])", "\\1_\\2", colourScale)
    
    sankeyNetwork(
      Links = flows, 
      Nodes = nodes,
      Source = "source2", 
      Target = "target2", 
      Value = "value", 
      NodeID = "name", 
      units = "T",
      fontSize = 12, 
      nodeWidth = 30,
      LinkGroup = "group",       # link group
      NodeGroup = "group",       # node group
      colourScale = colourScale  # set color scale
    )
    

    Additionally, I would strongly suggest adding the color data directly to your flows and nodes data frames and then using a simple JS identity function to access them rather than the complicated JS domain and range specification, like so

    colors <- 
      data.frame(
        group = c("Epithelial", "Endothelial", "Stroma", "Immune", 
                  "Alveolar epithelium", "Airway epithelium",
                  "AT1", "AT2", "Basal", "Ciliated",
                  "Fibroblast lineage", "SM1",
                  "Alf1", "Alf2", "SM2",
                  "CEC1", "LEC1",
                  "CEC2", "LEC2",
                  "Myeloid1", "Lymphoid",
                  "B", "Macrophages", "MAST", "Monocytes", "Myeloid2", "T_cell",
                  "Proliferating", "Proliferating2", "Proliferating3"),
        color = c("#3a86ff", "#ffbe0b", "#8338ec", "#d62828", 
                  "#0081a7", "#0081a7",
                  "#a8dadc", "#a8dadc", "#a8dadc", "#a8dadc",
                  "#9d4edd", "#9d4edd",
                  "#cdb4db", "#cdb4db", "#cdb4db",
                  "#ffffb3", "#ffffb3",
                  "#ffffb3", "#ffffb3",
                  "#e76f51", "#e76f51",
                  "#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072", "#f7a072",
                  "#a98467", "#a98467", "#a98467")
      )
    
    flows$color <- colors$color[match(flows$group, colors$group)]
    nodes$color <- colors$color[match(nodes$name, colors$group)]
    
    sankeyNetwork(
      Links = flows, 
      Nodes = nodes,
      Source = "source2", 
      Target = "target2", 
      Value = "value", 
      NodeID = "name", 
      units = "T",
      fontSize = 12, 
      nodeWidth = 30,
      LinkGroup = "color",
      NodeGroup = "color",
      colourScale = "f => f"
    )