rplotsankey-diagramriverplot

ordering nodes in riverplot


I'm currently developing a riverplot diagram with the riverplot package. However, I struggle to get my data ordered on the plot. Let me explain with an example:

library(riverplot)
df.nodes <- cbind.data.frame(c("A", "B", "C", "D", "E", "F", "G", "H","I","K"), c(1,1,1,1,2,2,2,2,3,3))
colnames(df.nodes) <- c("ID", "x")
df.edges <- cbind.data.frame(c("A", "B", "C", "D", "E", "F", "G", "H"), c("G", "H", "E", "F", "K", "K","I","I"), rep(1, 8))
colnames(df.edges) <- c("N1", "N2", "Value")
ex.river <- makeRiver(df.nodes, df.edges)
plot(ex.river)

enter image description here

As you can see, the edges in this example cross each other. This becomes very confusing when you have more nodes, than in my example. My question: do you have any creative idea how to order the nodes that I get something like this: (this is not a code, but the output order on the graph that I would like)

D   F
C   E   K
B   H   I
A   G

The edges should cross each other the least possible. As I understand the df.nodes defines the order of the nodes, so I should reorder df.nodes somehow. Of course I could order the df.nodes manually, but if you have a lots of nodes that gets tricky.

Any ideas are much appreciated.


Solution

  • This was a tricky one, it require a lot of join operations using data.table. There are possibly smarter solutions. However, this one is working for the given data set.

    The basic idea is to sort the nodes and edges from left to right.

    Data

    df.nodes <- data.frame(ID = c("A", "B", "C", "D", "E", "F", "G", "H","I","K"),
                           x  = c(rep(1:2, each = 4L), 3L, 3L), 
                           stringsAsFactors = FALSE)
    df.edges <- data.frame(N1 = c("A", "B", "C", "D", "E", "F", "G", "H"), 
                           N2 = c("G", "H", "E", "F", "K", "K","I","I"), 
                           Value = rep(1L, 8),
                           stringsAsFactors = FALSE)
    
    library(data.table)   # CRAN version 1.10.4 used
    # coerce to data.table and use abbreviated object names
    edt <- setDT(df.edges)
    ndt <- setDT(df.nodes)
    

    Get edge positions

    # add x positions of nodes to edges
    # two joins required for each of the two nodes of an edge 
    edt2 <- ndt[ndt[edt, on = c(ID = "N2")], on = c(ID = "N1")][
      , setnames(.SD, c("N1", "x1", "N2", "x2", "Value"))]
    # add unique id number for edge x-positions from left to right
    # id reflects order of x pos 1-2, 2-3, ..., 10-11 
    edt2[order(x1, x2), e.pos := rleid(x1, x2)]
    edt2
    #   N1 x1 N2 x2 Value e.pos
    #1:  A  1  G  2     1     1
    #2:  B  1  H  2     1     1
    #3:  C  1  E  2     1     1
    #4:  D  1  F  2     1     1
    #5:  E  2  K  3     1     2
    #6:  F  2  K  3     1     2
    #7:  G  2  I  3     1     2
    #8:  H  2  I  3     1     2
    

    Sort from left to right

    # initialize: get order of nodes in leftmost x position
    # update edt2 with row number 
    edt2 <- ndt[x == 1L, .(N1 = ID, rn1 = .I)][edt2, on = "N1"]
    # loop over edge positions
    # determine row numbers (sort order) for nodes from left to right
    for (p in edt2[, head(unique(e.pos), -1L)]) {
      edt2[p == e.pos, rn2 := rn1]
      edt2 <- edt2[p == e.pos, .(N1 = N2, rn1 = rn2)][edt2, on = "N1"]
      edt2[, rn1 := dplyr::coalesce(rn1, i.rn1)][, i.rn1 := NULL]
    }
    edt2[e.pos == last(e.pos), rn2 := rn1]
    edt2
    #   N1 rn1 x1 N2 x2 Value e.pos rn2
    #1:  A   1  1  G  2     1     1   1
    #2:  B   2  1  H  2     1     1   2
    #3:  C   3  1  E  2     1     1   3
    #4:  D   4  1  F  2     1     1   4
    #5:  E   3  2  K  3     1     2   3
    #6:  F   4  2  K  3     1     2   4
    #7:  G   1  2  I  3     1     2   1
    #8:  H   2  2  I  3     1     2   2
    

    Extract sort order of nodes from edge table

    # extract sort order of all nodes from edge table,
    # update node table
    ndt <- unique(edt2[, .(ID = c(N1, N2), rn = c(rn1, rn2))], by = "ID")[ndt, on = "ID"]
    ndt
    #    ID rn x
    # 1:  A  1 1
    # 2:  B  2 1
    # 3:  C  3 1
    # 4:  D  4 1
    # 5:  E  3 2
    # 6:  F  4 2
    # 7:  G  1 2
    # 8:  H  2 2
    # 9:  I  1 3
    #10:  K  3 3
    

    Create riverplot

    library(riverplot)
    # pass sorted node table
    # coercion back to data.frame required due to type check in `makeRiver()`
    ex.river <- makeRiver(setDF(ndt[order(x, rn), .(ID, x)]), setDF(edt))
    plot(ex.river)
    

    enter image description here