assemblybit-manipulationx86-64saturation-arithmeticswar

Add two vectors (uint64_t type) with saturation for each int8_t element


I was recently faced with a given problem:

There are 8 elements in the vector, each is represented by int8_t.

Implement an algorithm in x86_64 that will add two vectors (uint64_t type).

Adding elements should be done with saturation arithmetic taken into account.

E.g.:

80 + 60 = 127

(−40) + (−100) = −128

The biggest challenge turns out to be the restrictions imposed:

I can't think of any solution that fits these restrictions. Could anyone give me some hints? Examples in C are welcome.

I can use only "standard", transfer, arithmetic, logical instructions and standard registers:


Solution

  • I wrote it in C++ like this:

    #include <cstdint>
    
    uint64_t add(uint64_t a, uint64_t b) {
        uint64_t asigns = a & 0x8080808080808080L;
        uint64_t bsigns = b & 0x8080808080808080L;
        uint64_t sum = (a^asigns) + (b^bsigns);
        // fix up 8 bit wrapped sums
        sum ^= asigns ^ bsigns;
        uint64_t sumsigns = sum & 0x8080808080808080L;
        // we saturate high when a and b were positive, but the result is negative
        uint64_t sat = sumsigns & ~(asigns|bsigns);
        sum |= (sat>>7)*127;
        sum &= ~sat;
        // we saturate negative when a and b were negative, but the result is positive
        sat = (asigns&bsigns) & ~sumsigns;
        sum &= ~((sat>>7)*127);
        sum |= sat;
        return sum;
    }
    

    Then I went over to https://godbolt.org/ to see what various compilers generate. clang-16 gives 33 instructions:

    add(unsigned long, unsigned long):
            movabs  rdx, -9187201950435737472
            mov     rax, rdi
            and     rax, rdx
            mov     rcx, rsi
            and     rcx, rdx
            movabs  r8, 9187201950435737471
            mov     r9, rdi
            and     r9, r8
            and     r8, rsi
            add     r8, r9
            xor     rax, rcx
            xor     rax, r8
            or      rsi, rdi
            not     rsi
            and     rdx, rsi
            and     rdx, r8
            mov     rsi, rdx
            shr     rsi, 7
            mov     r8, rdx
            sub     r8, rsi
            or      r8, rax
            xor     r8, rdx
            not     rax
            and     rcx, rdi
            and     rcx, rax
            mov     rdx, rcx
            shr     rdx, 7
            mov     rax, rcx
            sub     rax, rdx
            not     rax
            and     rax, r8
            or      rax, rcx
            ret
    

    You can try the various other options.