I want to multiplies all the elements of a vector. However, the following code snippet
long doulbe sum = parallel_reduce(
blocked_range<long double>(0, sum1.size()), 1.0L /* Identity for Multiplication */,
[&](tbb::blocked_range<long double> r, long double running_total)
{
for (size_t i = r.begin(); i < r.end(); i++)
running_total *= sum1[i];
return running_total;
},
std::multiplies<long double>());
produces wrong result. accumulate
gives the correct result:
long double sum2 = accumulate(sum1.begin(), sum1.end(), 1.0L, std::multiplies<long double>());
What do I miss?
The issue with your parallel_reduce
code is that it is not correctly handling the reduction step. The parallel_reduce
algorithm requires you to specify how the partial results from different threads should be combined. In your case, you need to ensure that the partial products are correctly multiplied together.
#include <tbb/tbb.h>
#include <vector>
#include <numeric>
long double parallel_product(const std::vector<long double>& sum1) {
return tbb::parallel_reduce(
tbb::blocked_range<size_t>(0, sum1.size()), 1.0L /* Identity for Multiplication */,
[&](const tbb::blocked_range<size_t>& r, long double running_total) {
for (size_t i = r.begin(); i < r.end(); i++) {
running_total *= sum1[i];
}
return running_total;
},
std::multiplies<long double>());
}
int main() {
std::vector<long double> sum1 = {1.0L, 2.0L, 3.0L, 4.0L};
long double sum2 = std::accumulate(sum1.begin(), sum1.end(), 1.0L, std::multiplies<long double>());
long double sum_parallel = parallel_product(sum1);
std::cout << "Accumulate result: " << sum2 << std::endl;
std::cout << "Parallel reduce result: " << sum_parallel << std::endl;
return 0;
}