c++floating-pointieee

How can I output bit pattern of infinity and NaN in C++?(IEEE standard)


I'm reading Computer Systems: A Programmer’s Perspective, then I found the Special Values's definition and corresponding bit patterns.

bit pattern of Infinity and NaN

Now, I wanna output their bits using C++. I use their macro to output bits, obviously is incorrect, because macro defined to Integer!

#define FP_NAN      0x0100
#define FP_NORMAL   0x0400
#define FP_INFINITE (FP_NAN | FP_NORMAL)

What should I do to correctly output bits in the image above? and, Why compiler defined those Integer macros rather than IEEE standard?

below is my code.

#include <iostream>
#include <cmath>
#include <bitset>

using namespace std;

union U {
    float f;
    int i;
};

int main() {

    U u1, u2;

    u1.f = FP_NAN;
    u2.f = FP_INFINITE;

    cout << bitset<32>(u1.i) << endl;
    cout << bitset<32>(u2.i) << endl;

    return 0;
}

output:

01000011100000000000000000000000
01000100101000000000000000000000

My computer environment:


Solution

  • I wrote a quick-and-dirty double bit-wise output program a while back. You could modify it to work for float.

    It has ANSI escape sequences in it, which might not be suitable for your environment.

    The key part is just using a byte memory pointer and examining the bit state directly, rather than trying to get std::bitset to play nice.

    #include <algorithm>
    #include <cmath>
    #include <cstddef>
    #include <cstring>
    #include <iomanip>
    #include <iostream>
    #include <limits>
    #include <sstream>
    #include <string>
    
    using std::cout;
    using std::fpclassify;
    using std::memcpy;
    using std::nan;
    using std::numeric_limits;
    using std::reverse;
    using std::setw;
    using std::size_t;
    using std::string;
    using std::stringstream;
    using std::uint32_t;
    using std::uint64_t;
    
    namespace {
    
    uint32_t low32_from(double d) {
        char const* p = reinterpret_cast<char const*>(&d);
        uint32_t result;
        memcpy(&result, p, sizeof result);
        return result;
    }
    
    uint32_t high32_from(double d) {
        char const* p = reinterpret_cast<char const*>(&d);
        p += 4;
        uint32_t result;
        memcpy(&result, p, sizeof result);
        return result;
    }
    
    string hexstr(uint32_t value) {
        char hex[] = "0123456789ABCDEF";
        unsigned char buffer[4];
        memcpy(buffer, &value, sizeof buffer);
        auto p = &buffer[0];
        stringstream ss;
        char const* sep = "";
        for (size_t i = 0; i < sizeof buffer; ++i) {
            ss << sep << hex[(*p >> 4) & 0xF] << hex[*p & 0xF];
            sep = " ";
            ++p;
        }
    
        return ss.str();
    }
    
    string bits(uint64_t v, size_t len) {
        string s;
        int group = 0;
        while (len--) {
            if (group == 4) { s.push_back('\''); group = 0; }
            s.push_back(v & 1 ? '1' : '0');
            v >>= 1;
            ++group;
        }
        reverse(s.begin(), s.end());
        return s;
    }
    
    string doublebits(double d) {
        auto dx = fpclassify(d);
        unsigned char buffer[8];
        memcpy(buffer, &d, sizeof buffer);
        stringstream ss;
        uint64_t s = (buffer[7] >> 7) & 0x1;
        uint64_t e = ((buffer[7] & 0x7FU) << 4) | ((buffer[6] >> 4) & 0xFU);
        uint64_t f = buffer[6] & 0xFU;
        f = (f << 8) + (buffer[5] & 0xFFU);
        f = (f << 8) + (buffer[4] & 0xFFU);
        f = (f << 8) + (buffer[3] & 0xFFU);
        f = (f << 8) + (buffer[2] & 0xFFU);
        f = (f << 8) + (buffer[1] & 0xFFU);
        f = (f << 8) + (buffer[0] & 0xFFU);
    
        ss << "sign:\033[0;32m" << bits(s, 1) << "\033[0m ";
        if (s) ss << "(-) ";
        else ss << "(+) ";
    
        ss << "exp:\033[0;33m" << bits(e, 11) << "\033[0m ";
        ss << "(" << setw(5) << (static_cast<int>(e) - 1023) << ") ";
    
    
        ss << "frac:";
    
        // 'i' for implied 1 bit, '.' for not applicable (so things align correctly).
        if (dx == FP_NORMAL) ss << "\033[0;34mi";
        else ss << "\033[0;37m.\033[34m";
    
        ss << bits(f, 52) << "\033[0m";
    
        if (dx == FP_INFINITE) ss << " \033[35mInfinite\033[0m";
        else if (dx == FP_NAN) ss << " \033[35mNot-A-Number\033[0m";
        else if (dx == FP_NORMAL) ss << " \033[35mNormal\033[0m";
        else if (dx == FP_SUBNORMAL) ss << " \033[35mDenormalized\033[0m";
        else if (dx == FP_ZERO) ss << " \033[35mZero\033[0m";
    
        ss << " " << d;
    
        return ss.str();
    }
    
    } // anon
    
    int main() {
        auto lo = low32_from(1111.2222);
        auto hi = high32_from(1111.2222);
        cout << hexstr(lo) << "\n";
        cout << hexstr(hi) << "\n";
        cout << doublebits(1111.2222) << "\n";
        cout << doublebits(1.0) << "\n";
        cout << doublebits(-1.0) << "\n";
        cout << doublebits(+0.0) << "\n";
        cout << doublebits(-0.0) << "\n";
        cout << doublebits(numeric_limits<double>::infinity()) << "\n";
        cout << doublebits(-numeric_limits<double>::infinity()) << "\n";
        cout << doublebits(nan("")) << "\n";
    
        double x = 1.0;
        while (x > 0.0) {
            cout << doublebits(x) << "\n";
            x = x / 2.0;
        }
    }