x86ssesimdsse4

SSE multiplication 16 x uint8_t


I want to multiply with SSE4 a __m128i object with 16 unsigned 8 bit integers, but I could only find an intrinsic for multiplying 16 bit integers. Is there nothing such as _mm_mult_epi8?


Solution

  • There is no 8-bit multiplication in MMX/SSE/AVX. However, you can emulate 8-bit multiplication intrinsic using 16-bit multiplication as follows:

    inline __m128i _mm_mullo_epi8(__m128i a, __m128i b)
    {
        __m128i zero = _mm_setzero_si128();
        __m128i Alo = _mm_cvtepu8_epi16(a);
        __m128i Ahi = _mm_unpackhi_epi8(a, zero);
        __m128i Blo = _mm_cvtepu8_epi16(b);
        __m128i Bhi = _mm_unpackhi_epi8(b, zero);
        __m128i Clo = _mm_mullo_epi16(Alo, Blo);
        __m128i Chi = _mm_mullo_epi16(Ahi, Bhi);
        __m128i maskLo = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
        __m128i maskHi = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
        __m128i C = _mm_or_si128(_mm_shuffle_epi8(Clo, maskLo), _mm_shuffle_epi8(Chi, maskHi));
    
         return C;
    }