cryptographycudaelliptic-curve

CUDA Elliptic curve addtion not working in some specific cases


I am trying to implement elliptic curve points operations in CUDA kernel. For this purpose I use CGBN library: https://github.com/NVlabs/CGBN/tree/master

The use-case is for very specific elliptic curve settings (definition in SageMath):

p = 0x62CE5177412ACA899CF5
r = 0x1CE4AF36EED8DE22B99D

a = 0x39C95E6DDDB1BC45733C
b = 0x1F16D880E89D5A1C0ED1

n = 0x62CE5177407B7258DC31

P_x = 0x315D4B201C208475057D
P_y = 0x035F3DF5AB370252450A

Q_x = 0x0679834CEFB7215DC365
Q_y = 0x4084BC50388C4E6FDFAB

F = GF(p)
E = EllipticCurve(F, [a, b])

P = E(P_x, P_y)
Q = E(Q_x, Q_y)

The implementation I've done so far is working for most of points, but I accidentally found specific pairs of points for which I got wrong results. I looked a few times in code and couldn't find any possible cause of this behavior. Here is the implementation function:

__device__ dev_EC_point add_points(env192_t bn_env, const dev_EC_point &P1, const dev_EC_point &P2, const dev_Parameters &params)
{
    if (cgbn_equals(bn_env, P1.x, P2.x) && cgbn_equals(bn_env, P1.y, P2.y))
    {
        return double_point(bn_env, P1, params);
    }

    env192_t::cgbn_t t2;
    if (cgbn_sub(bn_env, t2, P1.x, P2.x)) // x1 - x2 mod Pmod
    {
        printf("BREAKPOINT 1\n");
        cgbn_sub(bn_env, t2, P2.x, P1.x);
        cgbn_sub(bn_env, t2, params.Pmod, t2);
    }


    cgbn_modular_inverse(bn_env, t2, t2, params.Pmod); // 1/(x1-x2) mod Pmod

    // Montgomery space

    env192_t::cgbn_t x1, y1, x2, y2;

    uint32_t np0;
    np0 = cgbn_bn2mont(bn_env, x1, P1.x, params.Pmod);
    cgbn_bn2mont(bn_env, y1, P1.y, params.Pmod);
    cgbn_bn2mont(bn_env, x2, P2.x, params.Pmod);
    cgbn_bn2mont(bn_env, y2, P2.y, params.Pmod);
    cgbn_bn2mont(bn_env, t2, t2, params.Pmod);

    env192_t::cgbn_t t1;

    if (cgbn_sub(bn_env, t1, y1, y2)) // y0 - y1 mod Pmod
    {
        printf("BREAKPOINT 2\n");
        cgbn_sub(bn_env, t1, y2, y1);
        cgbn_sub(bn_env, t1, params.Pmod, t1);
    }

    env192_t::cgbn_t s, s_sq, x3, y3, t3;

    cgbn_mont_mul(bn_env, s, t1, t2, params.Pmod, np0); // s = (y1-y2)/(x1-x2) mod Pmod // tested

    cgbn_mont_sqr(bn_env, s_sq, s, params.Pmod, np0); // s^2 mod Pmod // tested

    cgbn_add(bn_env, t3, x1, x2); // x1 + x2

    if (cgbn_sub(bn_env, x3, s_sq, t3)) // x3 = s^2 - x1 - x2 // mod Pmod
    {
        printf("BREAKPOINT 4\n");
        cgbn_sub(bn_env, x3, t3, s_sq);
        cgbn_sub(bn_env, x3, params.Pmod, x3);
    }

    if (cgbn_sub(bn_env, t3, x1, x3)) // t3 = x1 - x3 // mod Pmod
    {
        printf("BREAKPOINT 5\n");
        cgbn_sub(bn_env, t3, x3, x1);
        cgbn_sub(bn_env, t3, params.Pmod, t3);
    }

    cgbn_mont_mul(bn_env, t3, t3, s, params.Pmod, np0);

    if (cgbn_sub(bn_env, y3, t3, y1))
    {
        printf("BREAKPOINT 6\n");
        cgbn_sub(bn_env, y3, y1, t3);
        cgbn_sub(bn_env, y3, params.Pmod, y3);
    }

    cgbn_mont2bn(bn_env, x3, x3, params.Pmod, np0);
    cgbn_mont2bn(bn_env, y3, y3, params.Pmod, np0);

    // cgbn_sub(bn_env, x3, s_sq, t1);

    return dev_EC_point{x3, y3};
}

and the kernel for tests:

__global__ void ker_add_points(cgbn_error_report_t *report, EC_point *points, EC_parameters *parameters, int32_t instances)
{
    int32_t instance;
    int32_t points_index;

    instance = (blockIdx.x * blockDim.x + threadIdx.x) / TPI;
    points_index = instance * 2;

    if (instance >= instances)
    {
        return;
    }

    context_t bn_context(cgbn_report_monitor, report, instance); // construct a context
    env192_t bn192_env(bn_context.env<env192_t>());

    dev_EC_point P0, P1;
    dev_Parameters params;

    cgbn_load(bn192_env, P0.x, &(points[points_index].x));
    cgbn_load(bn192_env, P0.y, &(points[points_index].y));

    cgbn_load(bn192_env, P1.x, &(points[points_index + 1].x));
    cgbn_load(bn192_env, P1.y, &(points[points_index + 1].y));

    cgbn_load(bn192_env, params.Pmod, &(parameters->Pmod));
    cgbn_load(bn192_env, params.a, &(parameters->a));

    dev_EC_point result = add_points(bn192_env, P0, P1, params);

    cgbn_store(bn192_env, &(points[points_index].x), result.x);
    cgbn_store(bn192_env, &(points[points_index].y), result.y);
}

The points-pairs for which there is a problem:

A = Q * 1001
B = Q * 20
R = A + B # got wrong answer during tests

A = P * 15
B = Q
R = A + B # got wrong answer during tests 

All tests are run using SageMath and pytest calling a test function via ctypes. Whole project for context: https://github.com/atlomak/thesis


Solution

  • With some more tests and adding print statement, I saw that sometimes the addition result in x1 + x2 step is larger then p. I solved problem by adding additional check if value does surpass p:

        cgbn_add(bn_env, t3, x1, x2); // x1 + x2
        if(cgbn_compare(bn_env, t3, params.Pmod) > 0)
        {
            cgbn_sub(bn_env, t3, t3, params.Pmod);
        }