I'm confused about the performance of my C
program that implements a Segmented Sieve. I originally used only int
as the datatype but to support bigger numbers I decided to switch to unsigned long long
. I expected some performance penalty due to overhead, but when I try the segmented sieve with upper limit 100 billion, the int
approach takes 23 seconds whereas the unsigned long long
doesn't even finish (or at least takes too long for me to wait)
here's the segmented sieve with just int
datatype, with N (upper bound) preset to 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
int size = 5;
int current_slot = 0;
int* primes_arr;
void append(int data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
int* newarr = realloc(primes_arr, (size += 5) * sizeof(int));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
// The following is just a standard approach to segmented sieve, nothing interesting
void simpleSieve(int limit)
{
int p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in simpleSieve\n");
exit(1);
}
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (int i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
// printf("%d ", p);
}
}
}
void segmentedSieve(int n)
{
int limit = (int)floor(sqrt(n)) + 1;
simpleSieve(limit);
int low = limit;
int high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark != NULL)
{
memset(mark, true, sizeof(bool) * (limit + 1));
}
else
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
for (int i = 0; i < current_slot; i++)
{
int lower_lim = (int)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (int j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (int i = low; i < high; i++)
{
if (mark[i - low] == true)
{
// printf("%d ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(int));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double) (t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
and here's the segmented sieve with just unsigned long long
datatype, with N (upper bound) preset to 100B-
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<math.h>
#include<time.h>
unsigned long size = 5, current_slot = 0;
unsigned long long* primes_arr;
void append(unsigned long long data)
{
if ((current_slot + 1) < size)
{
primes_arr[current_slot++] = data;
return;
}
unsigned long long* newarr = realloc(primes_arr, (size += 5l) * sizeof(unsigned long long));
if (newarr != NULL)
{
newarr[current_slot++] = data;
primes_arr = newarr;
}
else
{
printf("\nAn error occured while re-allocating memory\n");
exit(1);
}
}
void simpleSieve(unsigned long limit)
{
unsigned long long p;
if (primes_arr == NULL)
{
printf("\nAn error occured while allocating primes_arr for mark in simpleSieve\n");
exit(1);
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (p = 2; p * p < limit; p++)
{
if (mark[p])
{
for (unsigned long long i = p * 2; i < limit; i += p)
{
mark[i] = false;
}
}
}
for (p = 2; p < limit; p++)
{
if (mark[p])
{
append(p);
//printf("%llu ", p);
}
}
}
void segmentedSieve(unsigned long long n)
{
unsigned long limit = (unsigned long)floor(sqrt(n)) + 1l;
simpleSieve(limit);
unsigned long low = limit;
unsigned long high = 2 * limit;
while (low < n)
{
if (high >= n)
{
high = n;
}
bool* mark = malloc((limit + 1) * sizeof(bool));
if (mark == NULL)
{
printf("\nAn error occured while allocating memory for mark in segmentedSieve\n");
exit(1);
}
memset(mark, true, sizeof(bool) * (limit + 1));
for (unsigned long long i = 0; i < current_slot; i++)
{
unsigned long long lower_lim = (unsigned long)floor(low / primes_arr[i]) * primes_arr[i];
if (lower_lim < low)
{
lower_lim += primes_arr[i];
}
for (unsigned long long j = lower_lim; j < high; j += primes_arr[i])
{
mark[j - low] = false;
}
}
for (unsigned long long i = low; i < high; i++)
{
if (mark[i - low])
{
//printf("%llu ", i);
}
}
low = low + limit;
high = high + limit;
free(mark);
}
}
int main()
{
primes_arr = malloc(size * sizeof(unsigned long long));
clock_t t0 = clock();
segmentedSieve(100000000000);
clock_t t1 = clock();
double time_taken = (double)(t1 - t0) / CLOCKS_PER_SEC;
printf("\nDone! Time taken: %f\n", time_taken);
return 0;
}
I fail to see why this is happening, am I doing something wrong?
Edit: I also realize that int
shouldn't be capable of handling 100 billion anyway and yet the program executes with no errors and even prints the final time report. Meanwhile the unsigned long long
program doesn't even finish in double the time it takes for the int
one.
On the other hand, trying to set the upper bound to 1B on both, actually return pretty similar results. Why?
100 billion decimally is 17 4876 E800H hexadecimally. As it doesn't fit into int
, the 'surplus' most significant bits are cut away, remaining 4876 4800H, which is 1.215.752.192D, so you actually set the limit to just a 100th of what you actually intended when calling segmentedSieve
within main
.
Actually, you have been lucky not to produce a negative number that way.
Be aware, though, that you have entered the land of undefined behaviour due to signed integer overflow. Anything could have happened, including your programme crashing or switching off the sun...
Far more interesting, though, have a look at the following:
segmentedSieve(unsigned long long n)
{
unsigned long low = limit;
while (low < n)
That's critical. On many systems, long
has the same size as int
(with e. g. 64-bit linux being an exception). If that's the case on your system as well, then you produce an en endless loop, as low
will just overflow, too, (solely that it's not undefined behaviour this time) and won't ever be able to reach your 100 billion stored in n
...
You should use unsigned long long
consistently – or maybe even better, uint64_t
from stdint.h
.