I am now trying to make a program to find the Absolute Euler Pseudoprimes ('AEPSP' in short, not Euler-Jacobi Pseudoprimes), with the definition that n is an AEPSP if
a(n-1)/2 ≡ ±1 (mod n)
for all positive integers a satisfying that the GCD of a and n is 1.
I used a C++ code to generate AEPSPs, which is based on a code to generate Carmichael numbers:
#include <iostream>
#include <cmath>
#include <algorithm>
#include <numeric>
using namespace std;
unsigned int bm(unsigned int a, unsigned int n, unsigned int p){
unsigned long long b;
switch (n) {
case 0:
return 1;
case 1:
return a % p;
default:
b = bm(a,n/2,p);
b = (b*b) % p;
if (n % 2 == 1) b = (b*a) % p;
return b;
}
}
int numc(unsigned int n){
int a, s;
int found = 0;
if (n % 2 == 0) return 0;
s = sqrt(n);
a = 2;
while (a < n) {
if (a > s && !found) {
return 0;
}
if (gcd(a, n) > 1) {
found = 1;
}
else {
if (bm(a, (n-1)/2, n) != 1) {
return 0;
}
}
a++;
}
return 1;
}
int main(void) {
unsigned int n;
for (n = 3; n < 1e9; n += 2){
if (numc(n)) printf("%u\n",n);
}
return 0;
}
Yet, the program is very slow. It generates AEPSPs up to 1.5e6 in 20 minutes. Does anyone have any ideas on optimizing the program?
Any help is most appreciated. :)
I've come up with a different algorithm, based on sieving for primes upfront while simultaneously marking off non-squarefree numbers. I've applied a few optimizations to pack the information into memory a bit tighter, to help with cache-friendliness as well. Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define PRIME_BIT (1UL << 31)
#define SQUARE_BIT (1UL << 30)
#define FACTOR_MASK (SQUARE_BIT - 1)
void sieve(uint64_t size, uint32_t *buffer) {
for (uint64_t i = 3; i * i < size; i += 2) {
if (buffer[i/2] & PRIME_BIT) {
for (uint64_t j = i * i; j < size; j += 2 * i) {
buffer[j/2] &= SQUARE_BIT;
buffer[j/2] |= i;
}
for (uint64_t j = i * i; j < size; j += 2 * i * i) {
buffer[j/2] |= SQUARE_BIT;
}
}
}
}
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: prog LIMIT\n");
return 1;
}
uint64_t size = atoi(argv[1]);
uint32_t *buffer = malloc(size * sizeof(uint32_t) / 2);
memset(buffer, 0x80, size * sizeof(uint32_t) / 2);
sieve(size, buffer);
for (uint64_t i = 5; i < size; i += 4) {
if (buffer[i/2] & PRIME_BIT)
continue;
if (buffer[i/2] & SQUARE_BIT)
continue;
uint64_t num = i;
uint64_t factor;
while (num > 1) {
if (buffer[num/2] & PRIME_BIT)
factor = num;
else
factor = buffer[num/2] & FACTOR_MASK;
if ((i / 2) % (factor - 1) != 0) {
break;
}
num /= factor;
}
if (num == 1)
printf("Found pseudo-prime: %ld\n", i);
}
}
This produces the pseudo-primes up to 1.5e6 in about 8ms on my machine, and for 2e9 it takes 1.8sec.
The time complexity of the solution is O(n log n) - the sieve is O(n log n), and for each number we either do constant time checks or do a divisibility test for each of its factors, which there are at most log n. So, the main loop is also O(n log n), resulting in O(n log n) overall.