pythonpathnetworkxdirected-graphgroup

Finding all directed paths in networkx and saving them as a dataframe


I need to find all directed paths in a network as shown in the sample, and save the directed paths in a new dataframe.

Sample:

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

sample_dict = {
    'target': ['A', 'A', 'B', 'B', 'F'],
    'source': ['B', 'E', 'C', 'D', 'G'],
}

sample_data = pd.DataFrame(sample_dict)

G = nx.from_pandas_edgelist(sample_data,
                         source='source',
                         target='target',
                         create_using=nx.DiGraph())

pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True)
plt.show()

enter image description here

I have tired with nx.weakly_connected_components, but i don't know how to account for direction.

d = {}
for c in nx.weakly_connected_components(G):
    path= ','.join(sorted(c))
    for n in c:
        d[n] = path
attempt_data = pd.DataFrame(d.items())


    0   1
0   A   A,B,C,D,E
1   C   A,B,C,D,E
2   D   A,B,C,D,E
3   E   A,B,C,D,E
4   B   A,B,C,D,E
5   G   F,G
6   F   F,G

Desired output:

desired_dict = {
    'unit': ['A', 'A', 'A', 'B', 'B', 'C', 'D', 'E', 'F', 'G'],
    'group': ['A,B,C', 'A,B,D', 'A,E', 'A,B,C', 'A,B,D', 'A,B,C', 'A,B,D', 'A,E', 'F,G', 'F,G']
}

desired_data = pd.DataFrame(desired_dict)
print(desired_data)

  unit  group
0   A   A,B,C
1   A   A,B,D
2   A   A,E
3   B   A,B,C
4   B   A,B,D
5   C   A,B,C
6   D   A,B,D
7   E   A,E
8   F   F,G
9   G   F,G

Solution

  • I will try to give a solution "ugly". The steps are commented for explaining the codes. But I have used a lot of for loops, if someone can improve this, I will be appreciated.

    # Find the sources and targets nodes with degrees
    sources = [x for x in G.nodes() if G.out_degree(x)==1 and G.in_degree(x)==0]
    targets = [x for x in G.nodes() if G.out_degree(x)==0 and G.in_degree(x)>=1]
    
    
    # Generate all the paths with the sources and targets
    paths = []
    for source_node in sources:
        for target_node in targets:
            path = list(nx.all_simple_paths(G, source=source_node, target=target_node))
            if len(path) > 0:
                paths.append(path[0])
                
    
    # Find the corresponding path to the node             
    unit_list = []
    group_list = []
    
    for node in G.nodes():
        for path in paths: 
            if node in path:
                unit_list.append(node)
                group_list.append(','.join(reversed(path)))
    
    # Sort the output with the order of the nodes
    
    sorted_list = list(zip(*sorted(zip(unit_list, group_list))))
    
    desired_dict  = {'unit' : sorted_list[0],
                     'group' : sorted_list[1]}
    
    desired_data = pd.DataFrame(desired_dict)
    
    print(desired_data)
    

    The result is as you wished:

     unit  group
    0    A  A,B,C
    1    A  A,B,D
    2    A    A,E
    3    B  A,B,C
    4    B  A,B,D
    5    C  A,B,C
    6    D  A,B,D
    7    E    A,E
    8    F    F,G
    9    G    F,G