c++pointersvectorstrassen

why vector use less memory than pointers in this code?


I wrote paralell program based on a Strassen multiplication algorithm using pointers. this program return the result of multiplication of two matrices that are the same size. when the size is 256 , program fill about 1 GB of ram, and when it is 512 ram total\y become full and my windows doesn't work then I must restart.

I replace whole pointers with vectors then incredibly Ram usage decreased!.for 1024 size , just 80 MB of ram used.

I know a little about vector that is bound statically at first then if we need more space during runtime its bound dynamically.

Why pointers needed more space than vectors ?

this is my first code :

#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>

#include <Windows.h>
#include <Psapi.h>
#include<vector>


using namespace std;

int ** matrix_1;
int ** matrix_2;

#define number_thread:4;

void show(string name, int n, int **show)
{
    cout << " matrix " << name << " :" << endl;
    for (int i = 0; i < n; i++)
    {

        for (int j = 0; j < n; j++)
            cout << show[i][j] << " ";
        cout << endl;
    }
}



int ** strassen(int n, int **matrix_a, int ** matrix_b)
{

    int ** A11;
    int ** A12;
    int ** A21;
    int ** A22;

    int ** B11;
    int ** B12;
    int ** B21;
    int ** B22;

    int ** result;


    int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
    A11 = new int*[n / 2];
    A12 = new int*[n / 2];
    A21 = new int*[n / 2];
    A22 = new int*[n / 2];

    B11 = new int*[n / 2];
    B12 = new int*[n / 2];
    B21 = new int*[n / 2];
    B22 = new int*[n / 2];


    result = new int *[n];

    m1 = new int*[n / 2];
    m2 = new int*[n / 2];
    m3 = new int*[n / 2];
    m4 = new int*[n / 2];
    m5 = new int*[n / 2];
    m6 = new int*[n / 2];
    m7 = new int*[n / 2];
    m8 = new int*[n / 2];

    cilk_for(int i = 0; i < n / 2; i++)
    {
        //cout << " value i : " << i << endl;
        A11[i] = new int[n / 2];
        A12[i] = new int[n / 2];
        A21[i] = new int[n / 2];
        A22[i] = new int[n / 2];

        B11[i] = new int[n / 2];
        B12[i] = new int[n / 2];
        B21[i] = new int[n / 2];
        B22[i] = new int[n / 2];

        m1[i] = new int[n / 2];
        m2[i] = new int[n / 2];
        m3[i] = new int[n / 2];
        m4[i] = new int[n / 2];
        m5[i] = new int[n / 2];
        m6[i] = new int[n / 2];
        m7[i] = new int[n / 2];
        m8[i] = new int[n / 2];

    }

    cilk_for(int i = 0; i < n; i++) // matrix result
        result[i] = new int[n];


    if (n == 2)
    {
        result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
        result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
        result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
        result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];

        return result;

    }
    //  for (int i = 0; i < n;i++)

    cilk_for(int i = 0; i < (n / 2); i++)
    {
        for (int j = 0; j < (n / 2); j++)
        {
            A11[i][j] = matrix_a[i][j];
            B11[i][j] = matrix_b[i][j];

            A12[i][j] = matrix_a[i][j + n / 2];
            B12[i][j] = matrix_b[i][j + n / 2];

            A21[i][j] = matrix_a[i + n / 2][j];
            B21[i][j] = matrix_b[i + n / 2][j];

            A22[i][j] = matrix_a[i + n / 2][j + n / 2];
            B22[i][j] = matrix_b[i + n / 2][j + n / 2];


        }
    }
    /*
    show("A11", n / 2, A11);
    show("A12", n / 2, A12);
    show("A21", n / 2, A21);
    show("A22", n / 2, A22);
    show("B11", n / 2, B11);
    show("B12", n / 2, B12);
    show("B21", n / 2, B21);
    show("B22", n / 2, B22);*/

    // Run By eight_thread
    m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
    m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
    m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
    m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
    m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
    m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
    m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
    m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22



    cilk_sync;

    /*
    cout << "****************************\n";
    cout << "*********** before add :\n";
    show("m1", n / 2, m1);
    show("m2", n / 2, m2);
    show("m3", n / 2, m3);
    show("m4", n / 2, m4);
    show("m5", n / 2, m5);
    show("m6", n / 2, m6);
    show("m7", n / 2, m7);
    show("m8", n / 2, m8);*/


    cilk_for(int i = 0; i < n / 2; i++)
    for (int j = 0; j < n / 2; j++)
    {
        m1[i][j] = m1[i][j] + m2[i][j];
        m3[i][j] = m3[i][j] + m4[i][j];
        m5[i][j] = m5[i][j] + m6[i][j];
        m7[i][j] = m7[i][j] + m8[i][j];

    }

    /*cout << "after adding hello \n";
    show("m1", n / 2, m1);
    show("m3", n / 2, m3);
    show("m5", n / 2, m5);
    show("m7", n / 2, m7);*/



    cilk_for(int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            if (i < n / 2 && j < n / 2)
            {
                result[i][j] = m1[i][j];
            }
            else if (i < n / 2 && j >= n / 2)
            {
                result[i][j] = m3[i][j - n / 2];
            }
            else if (i >= n / 2 && j < n / 2)
            {
                result[i][j] = m5[i - n / 2][j];
            }
            else if (i >= n / 2 && j >= n / 2)
            {
                result[i][j] = m7[i - n / 2][j - n / 2];

            }
        }
    }

    /*
    cilk_for(int i = 0; i < n / 2; i++)
    {
    for (int j = 0; j < n / 2; j++)
    {
    delete A11[i][j];
    delete A12[i][j];
    delete A21[i][j];
    delete A22[i][j];
    delete B11[i][j];
    delete B12[i][j];
    delete B21[i][j];
    delete B22[i][j];


    delete m1[i][j];
    delete m2[i][j];
    delete m3[i][j];
    delete m4[i][j];
    delete m5[i][j];
    delete m6[i][j];
    delete m7[i][j];
    delete m8[i][j];*/






    /*  }
        delete[] A11[i];
        delete[] A12[i];
        delete[] A21[i];
        delete[] A22[i];
        delete[] B11[i];
        delete[] B12[i];
        delete[] B21[i];
        delete[] B22[i];


        delete[] m1[i];
        delete[] m2[i];
        delete[] m3[i];
        delete[] m4[i];
        delete[] m5[i];
        delete[] m6[i];
        delete[] m7[i];
        delete[] m8[i];
        }*/


    delete[] A11;
    delete[] A12;
    delete[] A21;
    delete[] A22;
    delete[] B11;
    delete[] B12;
    delete[] B21;
    delete[] B22;


    delete[] m1;
    delete[] m2;
    delete[] m3;
    delete[] m4;
    delete[] m5;
    delete[] m6;
    delete[] m7;
    delete[] m8;

    return result;
}



int main()
{

    int size;

    freopen("in.txt", "r", stdin);
    freopen("out.txt", "w", stdout);


    __cilkrts_set_param("nworkers", "4");
    //cout << " please Enter the size OF ur matrix /n";
    cin >> size;

    matrix_1 = new int*[size];
    matrix_2 = new int*[size];

    if (size % 2 == 0)
    {

        //instialize matrix1
        //cout << "matrix_1 :" << endl;
        for (int i = 0; i < size; i++)
        {
            matrix_1[i] = new int[size];
            for (int j = 0; j < size; j++)

            {
                matrix_1[i][j] = rand() % 3;
                //cin >> matrix_1[i][j];
                //cout << matrix_1[i][j] << " ";

            }
            //cout << endl;

        }
        //instialize matrix2
        //cout << "matrix2_is :\n";
        for (int i = 0; i < size; i++)
        {
            matrix_2[i] = new int[size];
            for (int j = 0; j < size; j++)

            {

                matrix_2[i][j] = rand() % 3;
                //cout << matrix_2[i][j]<<" ";
                //cin >> matrix_2[i][j];

            }
            //  cout << endl;

        }
        clock_t begin = clock();


        matrix_2 = strassen(size, matrix_1, matrix_2);

        clock_t end = clock();
        double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;

        cout << "*******\ntime is : " << elapsed_secs << endl;

        //answer:
        /*  for (int i = 0; i < size; i++)
            {
            for (int j = 0; j < size; j++)

            {
            cout<< matrix_2[i][j]<<" ";

            }
            cout << endl;

            }*/


    }
    else
        cout << " we couldnt use strasen ";

    cout << "\nTotal Virtual Memory:" << endl;

    MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
    printf("%u", totalVirtualMem);

    cout << "\nVirtual Memory currently used:" << endl;
    //  MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
    printf("%u", virtualMemUsed);


    cout << "\nVirtual Memory currently used by current process:" << endl;

    PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
    printf("%u", virtualMemUsedByMe);

    cout << "\nPhysical Memory currently used: " << endl;
    //MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;

    printf("%u", physMemUsed);

    cout << endl;
    cout << "\nPhysical Memory currently used by current process : " << endl;
    //  PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
    printf("%u", physMemUsedByMe);
    //cout << "memory usage :"<<double(totalVirtualMem) << endl;


    //_getch();

    return 0;

}

I replace whole pointers array with vectors :

#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>

#include <Windows.h>
#include <Psapi.h>
#include<vector>


using namespace std;
vector<vector<int> > matrix_1, matrix_2;

//int matrix_1;
//int ** matrix_2;

#define number_thread:4;

void show(string name ,int n, int **show)
{
    cout << " matrix " << name<<" :" << endl;
    for (int i = 0; i < n; i++)
    {

        for (int j = 0; j < n; j++)
            cout << show[i][j] << " ";
        cout << endl;
    }
}


vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)
{

    vector<vector<int>> A11;
    vector<vector<int>> A12;
    vector<vector<int>> A21;
    vector<vector<int>> A22;

    vector<vector<int>> B11;
    vector<vector<int>> B12;
    vector<vector<int>> B21;
    vector<vector<int>> B22;

    vector<vector<int>> result;


    vector<int> help;


    vector<vector<int>> m1, m2, m3,  m4, m5,  m6,  m7,  m8;




    help.clear();
    for (int j = 0; j < n / 2; j++)
    {
        help.push_back(2);
    }


    for(int i = 0; i < n / 2; i++)
    {
        A11.push_back(help);
        A12.push_back(help);
        A21.push_back(help);
        A22.push_back(help);

        B11.push_back(help);
        B12.push_back(help);
        B21.push_back(help);
        B22.push_back(help);


        m1.push_back(help);
        m2.push_back(help);
        m3.push_back(help);
        m4.push_back(help);

        m5.push_back(help);
        m6.push_back(help);
        m7.push_back(help);
        m8.push_back(help);
    }


    for (int j = 0; j < n / 2; j++)
        help.push_back(2);
    for(int i = 0; i < n; i++)
    {
        result.push_back(help);

    }
    if (n == 2)
    {
        result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
        result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
        result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
        result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];

        return result;

    }
    //  for (int i = 0; i < n;i++)

    for(int i = 0; i < (n / 2); i++)
    {
        for(int j = 0; j <( n / 2); j++)
        {
            A11[i][j] = matrix_a[i][j];
            B11[i][j] = matrix_b[i][j];

            A12[i][j] = matrix_a[i][j + n / 2];
            B12[i][j] = matrix_b[i][j + n / 2];

            A21[i][j] = matrix_a[i + n / 2][j];
            B21[i][j] = matrix_b[i + n / 2][j];

            A22[i][j] = matrix_a[i + n / 2][j + n / 2];
            B22[i][j] = matrix_b[i + n / 2][j + n / 2];


        }
    }
    /*
    show("A11", n / 2, A11);
    show("A12", n / 2, A12);
    show("A21", n / 2, A21);
    show("A22", n / 2, A22);
    show("B11", n / 2, B11);
    show("B12", n / 2, B12);
    show("B21", n / 2, B21);
    show("B22", n / 2, B22);*/

    // Run By eight_thread
    m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
    m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
    m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
    m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
    m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
    m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
    m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
    m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22



    cilk_sync;

    /*
    cout << "****************************\n";
    cout << "*********** before add :\n";
    show("m1", n / 2, m1);
    show("m2", n / 2, m2);
    show
("m3", n / 2, m3);
    show("m4", n / 2, m4);
    show("m5", n / 2, m5);
    show("m6", n / 2, m6);
    show("m7", n / 2, m7);
    show("m8", n / 2, m8);*/


    for(int i = 0; i < n / 2; i++)
    for (int j = 0; j < n / 2; j++)
    {
        m1[i][j] = m1[i][j] + m2[i][j];
        m3[i][j] = m3[i][j] + m4[i][j];
        m5[i][j] = m5[i][j] + m6[i][j];
        m7[i][j] = m7[i][j] + m8[i][j];

    }

        /*cout << "after adding hello \n";
        show("m1", n / 2, m1);
        show("m3", n / 2, m3);
        show("m5", n / 2, m5);
        show("m7", n / 2, m7);*/



    for(int i = 0; i < n ; i++)
    {
        for(int j = 0; j < n ; j++)
        {
            if (i < n / 2 && j < n / 2)
            {
                result[i][j] = m1[i][j];
            }
            else if (i < n / 2 && j >= n / 2)
            {
                result[i][j] = m3[i][j - n / 2];
            }
            else if (i >= n / 2 && j < n / 2)
            {
                result[i][j] = m5[i - n / 2][j];
            }
            else if (i >= n / 2 && j >= n / 2)
            {
                result[i][j] = m7[i - n / 2][j - n / 2];

            }
        }
    }



    /*
    cilk_for(int i = 0; i < n / 2; i++)
    {
        for (int j = 0; j < n / 2; j++)
        {
            delete A11[i][j];
            delete A12[i][j];
            delete A21[i][j];
            delete A22[i][j];
            delete B11[i][j];
            delete B12[i][j];
            delete B21[i][j];
            delete B22[i][j];


            delete m1[i][j];
            delete m2[i][j];
            delete m3[i][j];
            delete m4[i][j];
            delete m5[i][j];
            delete m6[i][j];
            delete m7[i][j];
            delete m8[i][j];*/






    /*  }
        delete[] A11[i];
        delete[] A12[i];
        delete[] A21[i];
        delete[] A22[i];
        delete[] B11[i];
        delete[] B12[i];
        delete[] B21[i];
        delete[] B22[i];


        delete[] m1[i];
        delete[] m2[i];
        delete[] m3[i];
        delete[] m4[i];
        delete[] m5[i];
        delete[] m6[i];
        delete[] m7[i];
        delete[] m8[i];
    }*/


/*  for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)

        {
            cout << result[i][j] << " ";

        }
        cout << endl;

    }*/

    return result;
}



int main()
{

    int size;

    freopen("in.txt","r",stdin);
    freopen("out.txt", "w", stdout);


    __cilkrts_set_param("nworkers", "1");
    //cout << " please Enter the size OF ur matrix /n";
    cin >> size;

    vector<int> inner;
    if (size % 2 == 0)
    {

        //instialize matrix1
        cout << "matrix_1 :" << endl;
        for (int i = 0; i < size; i++)
        {
            inner.clear();

            for (int j = 0; j < size; j++)

            {
                inner.push_back(rand()%3);
                //cin >> matrix_1[i][j];
                cout << inner[j]<<" ";

            }
            cout << endl;

            matrix_1.push_back(inner);
        }
        //instialize matrix2
        cout << "matrix2_is :\n";
        inner.clear();
        for (int i = 0; i < size; i++)
        {
            inner.clear();
            //matrix_2[i] = new int[size];
            for (int j = 0; j < size; j++)

            {

            inner.push_back(rand()%3);
            cout << inner[j]<<" ";
                //cin >> matrix_2[i][j];

            }
            cout << endl;
            matrix_2.push_back(inner);
        }
        clock_t begin = clock();


        matrix_2 = strassen(size, matrix_1, matrix_2);

        clock_t end = clock();
        double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;

        cout << "*******\ntime is : " << elapsed_secs << endl;

        //answer:
        cout << "answerrr :" << endl;
        for (int i = 0; i < size; i++)
        {
            for (int j = 0; j < size; j++)

            {
                cout<< matrix_2[i][j]<<" ";

            }
            cout << endl;

        }


    }

    else
    cout << " we couldnt use strasen ";

    cout << "\nTotal Virtual Memory:" << endl;

    MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
    printf("%u", totalVirtualMem);

    cout << "\nVirtual Memory currently used:" << endl;
//  MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
    printf("%u", virtualMemUsed);


    cout << "\nVirtual Memory currently used by current process:" << endl;

    PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
    printf("%u", virtualMemUsedByMe);

    cout << "\nPhysical Memory currently used: " << endl;
    //MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;

    printf("%u", physMemUsed);

    cout << endl;
    cout << "\nPhysical Memory currently used by current process : " << endl;
//  PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
    printf("%u", physMemUsedByMe);
    //cout << "memory usage :"<<double(totalVirtualMem) << endl;


    //_getch();

    return 0;

}

Solution

  • I am going to guess this is an allocation issue. Allocation from the OS seems to be quite time consuming from what I have seen.

    Just a guess but maybe the std::vector default allocator is grabbing a much larger contiguous block of memory from the OS and is drawing from that to satisfy smaller vector allocations?

    This answer may provide some insight:

    https://stackoverflow.com/a/29659791/3807729

    I managed to reduce the time taken to run a test program simply by allocating, then deallocating a large std::vector before running the timing operations.

    I am speculating that the C++ runtime system (in some implementations) may hold on to memory it has received from the OS even after it has been deallocated because grabbing small chunks from the OS each time is much more expensive.