cfloating-pointtype-conversiondoubleunsigned-long-long-int

How to convert double to unsigned long long under rounding mode != FE_TOWARDZERO?


How to convert double to unsigned long long under rounding mode != FE_TOWARDZERO?

Extra: why there is no ullrint function?


Solution

  • The standard C99 math functions trunc(), ceil(), floor(), and rint() (with an appropriately set rounding mode) can be used to first round the double value to the next integer value using the desired rounding mode, before converting to unsigned long long with a cast. Note that sufficiently large double values (those with magnitude >= 253) are guaranteed to be integers, so there are no double-rounding issues in this approach.

    Alternatively (e.g. when using a tool chain that only supports an older ISO-C standard that does not offer these standard math functions), one can easily perform the conversion with a little straightforward bit manipulation. The following example code demonstrates both approaches. It assumes that uint64_t and unsigned long long are identical under the hood, and that double maps to IEEE-754 binary64.

    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>
    #include <string.h>
    #include <fenv.h>
    #include <math.h>
    
    #define RN (1)
    #define RZ (2)
    #define RU (3)
    #define RD (4)
    #define TEST_RND_MODE (RD)
    
    uint64_t double_as_uint64 (double a)
    {
        uint64_t r;
        memcpy (&r, &a, sizeof r);
        return r;
    }
    
    uint64_t double_to_uint64 (double a, int rnd_mode)
    {
        const uint64_t uint64_nan = 0x8000000000000000ULL;
        const uint64_t uint64_msb = 0x8000000000000000ULL;
        const int dbl_expo_bias = 1023;
        const int dbl_expo_bits = 11;
        const int dbl_expo_mask = ((1 << dbl_expo_bits) - 1);
        const int dbl_mant_bits = 52; // stored
        uint64_t t = 0ULL;
        uint64_t ai = double_as_uint64 (a);
        uint64_t r;
        int shift;
    
        /* special case handling: season to taste */
        if (isnan (a)) return uint64_nan;
        if (a >= 0x1.0p64) return 0ULL;
        if (a <= 0.0) return 0ULL;
    
        shift = (int)(dbl_expo_bias + 63 - ((ai >> dbl_mant_bits) & dbl_expo_mask));
        r = ((ai << dbl_expo_bits) | uint64_msb);
        if (shift >= 64) {
            t = r >> (shift > 64);
            r = 0;
        } else if (shift) {
            t = r << (64 - shift);
            r = r >> shift;
        }
        if ((rnd_mode == FE_TONEAREST) && (t >= uint64_msb)) {
            r += (t == uint64_msb) ? (r & 1) : 1;
        } else if ((rnd_mode == FE_UPWARD) && t) {
            r++;
        }
        return r;
    }
    
    double uint64_as_double (uint64_t a)
    {
        double r;
        memcpy (&r, &a, sizeof r);
        return r;
    }
    
    static uint64_t kiss64_x = 1234567890987654321ULL;
    static uint64_t kiss64_c = 123456123456123456ULL;
    static uint64_t kiss64_y = 362436362436362436ULL;
    static uint64_t kiss64_z = 1066149217761810ULL;
    static uint64_t kiss64_t;
    #define MWC64  (kiss64_t = (kiss64_x << 58) + kiss64_c, \
                    kiss64_c = (kiss64_x >> 6), kiss64_x += kiss64_t, \
                    kiss64_c += (kiss64_x < kiss64_t), kiss64_x)
    #define XSH64  (kiss64_y ^= (kiss64_y << 13), kiss64_y ^= (kiss64_y >> 17), \
                    kiss64_y ^= (kiss64_y << 43))
    #define CNG64  (kiss64_z = 6906969069ULL * kiss64_z + 1234567ULL)
    #define KISS64 (MWC64 + XSH64 + CNG64)
    
    int main (void)
    {
        double x;
        uint64_t res, ref;
    
        do {
            x = fabs (uint64_as_double (KISS64));
    
    #if TEST_RND_MODE == RZ
            res = double_to_uint64 (x, FE_TOWARDZERO);
            ref = (uint64_t)trunc(x);
    #elif TEST_RND_MODE == RN
            res = double_to_uint64 (x, FE_TONEAREST);
            ref = (uint64_t)rint(x); // assumes current rounding mode is FE_TONEAREST!
    #elif TEST_RND_MODE == RU 
            res = double_to_uint64 (x, FE_UPWARD);
            ref = (uint64_t)ceil(x);
    #elif TEST_RND_MODE == RD
            res = double_to_uint64 (x, FE_DOWNWARD);
            ref = (uint64_t)floor(x);
    #else
    #error unsupported TEST_RND_MODE
    #endif
            if (res != ref) {
                printf ("err @ % 22.13a: res=%016llx  ref=%016llx\n", x, res, ref);
                return EXIT_FAILURE;
            }
        } while (1);
        return EXIT_SUCCESS;
    }