c++algorithmstlgraph-algorithmkosaraju-algorithm

Non recursive Kosaraju's two pass algorithm implementation taking forever to execute on a large data set


(eg):

1 2

2 3

3 1

3 4

5 4

Download link for the Large graph test case zip file

Link to my program file

Code follows:

//Macro definitions and Global variables

#define N 875714
#define all(a) (a).begin(), (a).end()
#define tr(c,i) for(typeof((c).begin()) i = (c).begin(); i != (c).end(); i++)

vi v(N), ft, size;

//Non recursive DFS algorithm

void DFS(vvi g, int s, int flag)
{
stack<int> stk;
stk.push(s);
v[s] = 1;

int jumpOut, count;
vi::iterator i;

if(flag == 2)
     count = 1;

while(!stk.empty())
{
i = g[stk.top()].begin();
jumpOut = 0;

for(; i != g[stk.top()].end(); i++)
{
    if(v[*i] != 1)
    {
        stk.push(*i);
        v[*i] = 1;

        if(flag == 2) //Count the SCC size
            count++;

        jumpOut = 1; //Jump to the while loop's beginning
        break;
    }
 }

 if(flag == 1 && jumpOut == 0) //Record the finishing time order of vertices
    ft.push_back(stk.top());

 if(jumpOut == 0)
      stk.pop();
}

if(flag == 2)
    size.push_back(count); //Store the SCC size
}

// The 2 pass Kosaraju algorithm

void kosaraju(vvi g, vvi gr)
{
cout<<"\nInside pass 1\n";

for(int i = N - 1; i >= 0; i--)
    if(v[i] != 1)
        DFS(gr, i, 1);

cout<<"\nPass 1 completed\n";

fill(all(v), 0);

cout<<"\nInside pass 2\n";

for(int i = N - 1; i >= 0; i--)
    if(v[ ft[i] ] != 1)
        DFS(g, ft[i], 2);

cout<<"\nPass 2 completed\n";
}

.

int main()
{
vvi g(N), gr(N);
ifstream file("/home/tauseef/Desktop/DAA/SCC.txt");
int first, second;
string line;

while(getline(file,line,'\n')) //Reading from file
{
    stringstream ss(line);
    ss >> first;
    ss >> second;
    if(first == second) //Eliminating self loops
        continue;

    g[first-1].push_back(second-1); //Creating G & Grev
    gr[second-1].push_back(first-1);
}

cout<<"\nfile read successfully\n";

kosaraju(g, gr);

cout<<"\nFinishing order is: ";
tr(ft, j)
    cout<<*j+1<<" ";
cout<<"\n";

sort(size.rbegin(), size.rend()); //Sorting the SCC sizes in descending order

cout<<"\nThe largest 5 SCCs are: ";
tr(size, j)
    cout<<*j<<" ";
cout<<"\n";

file.close();
}

Solution

  • There are several improvements that you can apply:
    1- cin is not as fast scanf for large inputs: Because your input file is huge you better use scanf to read your data.
    2- It is not a good idea to pass large data to functions by value: You have two huge graphs in your code that you pass them to functions by value. It takes a lot of time because every time you are making a copy of the data.
    3- There is no need to use iterator for traversing a vector: Because you are using a vector and you have random access to it via [] operator there is no need to use iterator to access data.
    4- Your DFS is not efficient: This is the most important one. Every time the program go to the beginning of the while and check the adjacency list of the element on top of the stack you start from the beginning and check elements. This make the algorithm very inefficient because you are checking some things over and over again. You can simply store how many of the children you have checked and when you go back to this element you start from the next element instead of starting from start.

    #include<iostream>
    #include<vector>
    #include<stack>
    #include<algorithm>
    #include<fstream>
    #include<string>
    #include<sstream>
    using namespace std;
    
    typedef vector<int> vi;
    typedef vector<vi> vvi;
    
    #define N 875714
    #define sz(a) int((a).size())
    #define all(a) (a).begin(), (a).end()
    #define tr(c,i) for(typeof((c).begin()) i = (c).begin(); i != (c).end(); i++)
    
    vi v(N), ft, size;
    vi childsVisited(N);
    
    void DFS(vvi &g, int s, int flag)
    {
        stack<int> stk;
        stk.push(s);
        v[s] = 1;
    
        int jumpOut, count;
    
        if(flag == 2)
            count = 1;
        int counter = 0;
        while(!stk.empty())
        {
            jumpOut = 0;
            int cur = stk.top();
            for ( ;childsVisited[cur] < g[cur].size(); ++childsVisited[cur] )
            //for ( int i=0; i< g[cur].size(); ++i )
            //for(; i != g[stk.top()].end(); i++)
            {
                int i = childsVisited[cur];
                int next = g[cur][i];
                if(v[next] != 1)
                {
                    stk.push(next);
                    v[next] = 1;
                    if(flag == 2) //Count the SCC size
                        count++;
    
                    jumpOut = 1; //Jump to the while loop's beginning
                    break;
                }
            }
    
            if(flag == 1 && jumpOut == 0) //Record the finishing time order of vertices
                ft.push_back(stk.top());
    
            if(jumpOut == 0)
                stk.pop();
        }
    
        if(flag == 2)
            size.push_back(count); //Store the SCC size
    }
    
    void kosaraju(vvi &g, vvi &gr)
    {
        cout<<"\nInside pass 1\n";
    
        for(int i = N - 1; i >= 0; i--)
            if(v[i] != 1)
                DFS(gr, i, 1);
    
        cout<<"\nPass 1 completed\n";
    
        fill(all(v), 0);
        fill(all(childsVisited), 0);
    
        cout<<"\nInside pass 2\n";
    
        for(int i = N - 1; i >= 0; i--)
            if(v[ ft[i] ] != 1)
                DFS(g, ft[i], 2);
    
        cout<<"\nPass 2 completed\n";
    }
    
    int main()
    {
        freopen("input.txt","r",stdin);
        vvi g(N), gr(N);
        //ifstream file("/home/tauseef/Desktop/DAA/SCC.txt");
        int first, second;
        //string line;
        unsigned long int cnt = 0;
    
        //while(getline(file,line,'\n')) //Reading from file
        //{
            //stringstream ss(line);
            //ss >> first;
            //ss >> second;
            //if(first == second) //Eliminating self loops
                //continue;
        for ( int i = 0; i < 5105043; ++i ){
            int first, second;
            scanf("%d %d",&first,&second);
            g[first-1].push_back(second-1); //Creating G & Grev
            gr[second-1].push_back(first-1);
        }
            //cnt++;
        //}
    
        cout<<"\nfile read successfully\n";
    
    
        kosaraju(g, gr);
    
        cout<<"\nFinishing order is: ";
    
        sort(size.rbegin(), size.rend()); //Sorting the SCC sizes in descending order
    
        cout<<"\nThe largest 5 SCCs are: ";
    
    }