c++multithreadingruntime-error

Getting error 'std::system_error' when executing on different machine


I tried a cpp program which uses threads on 12th Gen Intel(R) Core(TM) i7-1265U and got no errors, when trying the same program on a container running on Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz I get the following error error image.

#include <iostream>
#include <cstdlib>
#include <thread>
#include <chrono>
#include<vector>

using namespace std;
using namespace std::chrono;

void fillRandom(float *arr, int size) {
    for (int i = 0; i < size; ++i) {
        arr[i] = static_cast<float>(rand()) / RAND_MAX * 10.0;
}
}
void helper(float *c,float*a,float*b,int n,int o,int i,int j)
{
            int t=0,l;
            for (l = 0; l < o; l++) {
                t += a[i * o + l] * b[l * n + j];
            }
            c[i * n + j] = t;

}
void matmul(float *a, float *b, float *c, int m, int n, int o) {
    vector <thread> threads;
    int i, j, l, t;
    for (i = 0; i < m; i++) {
        for (j = 0; j < n; j++) {

            threads.emplace_back(&helper,c,a,b,n,o,i,j);
        }
    }
    for(auto& th : threads){
    th.join();
}
}

int main(int argc, char *argv[]) {

    srand(static_cast<unsigned int>(time(nullptr)));

    int m = 256;
    int n = m;
    int repetitions = 10;

    float *a = new float[m * n];
    float *b = new float[m * n];
    float *c = new float[m * n];

    fillRandom(a, m * n);
    fillRandom(b, m * n);


    for (int rep = 0; rep < repetitions; ++rep) {
        auto start = high_resolution_clock::now();
        matmul(a, b, c, m, n, m);
        auto stop = high_resolution_clock::now();
        auto duration = duration_cast<milliseconds>(stop - start);
        cout << "Time taken for " << rep<< "th repetition is " << duration.count() << " milliseconds" << endl;


    }

    delete[] a;
    delete[] b;
    delete[] c;

    return 0;
}

This is a mulit-threaded matrix multiply code, I am new to this, some help would be great, thanks.


Solution

  • You'll need to check the documentation for your os as to what the limit of the number of threads a program can create is. 65k is likely above that limit.

    In general if the threads are all just doing computations (i.e. not waiting for IO operations) then creating more threads than the number of available CPU cores will decrease performance. You can use std::thread::hardware_concurrency to check the number of cores.

    Something like this might work better:

    #include <vector>
    #include <thread>
    using namespace std;
    
    void helper(float *c,float*a,float*b,int n,int o,int row, int rows)
    {
        for (int i = row; i < row + rows; i++) {
            for (int j = 0; j < n; j++) {
                int t=0,l;
                for (l = 0; l < o; l++) {
                    t += a[i * o + l] * b[l * n + j];
                }
                c[i * n + j] = t;
            }
        }
    }
    void matmul(float *a, float *b, float *c, int m, int n, int o) {
        vector <thread> threads;
        int l, t;
        int numThreads = std::thread::hardware_concurrency();
        int rowsPerThread = (m + numThreads -1)/numThreads;
        for (int row = 0; row < m; row += rowsPerThread) { 
            int rows = std::min(rowsPerThread, m-row);
            threads.emplace_back(&helper,c,a,b,n,o,row, rows);
        }
        for(auto& th : threads){
            th.join();
        }
    }