pythonmultithreadinggraph

I trying to generate Erdös-Rényi Random Graphs faster with python using threading, but it didn't work, maybe I am doing something wrong?


This is the code I'm using, it take around 15-16 seconds to print the graph. I was also trying to make the threads uniquely giving for every a name and starting and join using that name, but no matter how many threads I was using the time wasn't changing much. I suspect that this is because of the way my generating function works, but I didn't really can know if this is true, maybe I am just using module in the wrong way

import random
import threading
import math
import time
n=1000
p=0.5
G={}
B=5
def generator(a):
    for i in range((int((a-1)/B)*n),int((a/B))*n):
        for j in range(i+1, n):
            if random.random()>p:
                G[i].append(j)
                G[j].append(i)
for k in range(n):
            G[k]=[]
threads = []
start_time=time.time()


for a in range(1, B+1):
    thread = threading.Thread(target = generator, args=(a, ))
    thread.start()
    threads.append(thread)


for thread in threads:
    thread.join()
print(G)
end_time1=time.time()
print("Time taken1: {}".format(end_time1-start_time))

Solution

  • Use NetworkX, and pass it a randomly-generated adjacency matrix using a constructor like from_scipy_sparse_array:

    import time
    import types
    
    import networkx
    import numpy as np
    import scipy.sparse
    
    
    def generator(
        n: int, p: float = 0.5,
        rand: np.random.Generator | types.ModuleType = np.random,
    ) -> networkx.Graph:
        ij = np.stack(np.triu_indices(n, k=1))
        select = rand.random(size=ij.shape[1]) < p
        edges = ij[:, select]
        sparse = scipy.sparse.coo_array(
            (
                np.ones(shape=edges.shape[1], dtype=bool),  # data
                edges,  # coordinates
            ),
            shape=(n, n),
        )
    
        return networkx.from_scipy_sparse_array(sparse)
    
    
    def demo() -> None:
        rand = np.random.default_rng(seed=0)
    
        start = time.perf_counter()
        G = generator(n=1_000, rand=rand)
        end = time.perf_counter()
    
        print(f'Time taken: {end - start:.4f}')
        print(G)
    
    
    if __name__ == '__main__':
        demo()
    
    Time taken: 0.2521
    Graph with 1000 nodes and 249903 edges
    

    Funny enough, the sparse method outperforms NetworkX's own Erdős-Rényi implementation:

    import time
    import networkx
    
    
    def generator(
        n: int, p: float = 0.5,
        seed: int | None = None,
    ) -> networkx.Graph:
        return networkx.fast_gnp_random_graph(n=n, p=p, seed=seed)
    
    
    def demo() -> None:
        start = time.perf_counter()
        G = generator(n=1_000, seed=0)
        end = time.perf_counter()
    
        print(f'Time taken: {end - start:.4f}')
        print(G)
    
    
    if __name__ == '__main__':
        demo()
    
    Time taken: 0.2688
    Graph with 1000 nodes and 249752 edges
    

    To achieve the same effect as the sparse constructor, but without a Scipy dependency:

    import time
    import networkx
    import numpy as np
    
    
    def generator(
        rand: np.random.Generator, n: int, p: float = 0.5,
    ) -> networkx.Graph:
        ij = np.stack(np.triu_indices(n, k=1), axis=-1)
        select = rand.random(size=ij.shape[0], dtype=np.float32) < p
        G = networkx.empty_graph(n=n)
        G.add_edges_from(ij[select].tolist())
        return G
    
    
    def demo() -> None:
        rand = np.random.default_rng(seed=0)
    
        start = time.perf_counter()
        G = generator(n=1_000, rand=rand)
        end = time.perf_counter()
    
        print(f'Time taken: {end - start:.4f}')
        print(G)
    
    
    if __name__ == '__main__':
        demo()