winapinvidiadirect2dwichdr

Maximum float value in 10-bit image in WIC


I'm trying to convert a HDR image float array I load to a 10-bit DWORD with WIC.

The type of the loading file is GUID_WICPixelFormat128bppPRGBAFloat and I got an array of 4 floats per color.

When I try to convert these to 10 bit as follows:

struct RGBX
{
    unsigned int b : 10;
    unsigned int g : 10;
    unsigned int r : 10;
    int a : 2;
} rgbx;

(which is the format requested by the NVIDIA encoding library for 10-bit rgb),

then I assume I have to divide each of the floats by 1024.0f in order to get them inside the 10 bits of a DWORD.

However, I notice that some of the floats are > 1, which means that their range is not [0,1] as it happens when the image is 8 bit.

What would their range be? How to store a floating point color into a 10-bits integer?

I'm trying to use the NVidia's HDR encoder which requires an ARGB10 like the above structure.

How is the 10 bit information of a color stored as a floating point number?

Btw I tried to convert with WIC but conversion from GUID_WICPixelFormat128bppPRGBAFloat to GUID_WICPixelFormat32bppR10G10B10A2 fails.

HRESULT ConvertFloatTo10(const float* f, int wi, int he, std::vector<DWORD>& out)
    {
        CComPtr<IWICBitmap> b;
        wbfact->CreateBitmapFromMemory(wi, he, GUID_WICPixelFormat128bppPRGBAFloat, wi * 16, wi * he * 16, (BYTE*)f, &b);
        CComPtr<IWICFormatConverter> wf;
        wbfact->CreateFormatConverter(&wf);
        wf->Initialize(b, GUID_WICPixelFormat32bppR10G10B10A2, WICBitmapDitherTypeNone, 0, 0, WICBitmapPaletteTypeCustom);

// This last call fails with 0x88982f50 : The component cannot be found. 
}

Edit: I found a paper (https://hal.archives-ouvertes.fr/hal-01704278/document), is this relevant to this question?


Solution

  • Floating-point color content that is greater than the 0..1 range is High Dynamic Range (HDR) content. If you trivially convert it to 10:10:10:2 UNORM then you are using 'clipping' for values over 1. This doesn't give good results.

    SDR 10:10:10 or 8:8:8

    You should instead use tone-mapping which converts the HDR signal to a SDR (Standard Dynamic Range a.k.a. 0..1) before or as part of doing the conversion to 10:10:10:2.

    There a many different approaches to tone-mapping, but a common 'generic' solution is the Reinhard tone-mapping operator. Here's an implementation using DirectXTex.

    std::unique_ptr<ScratchImage> timage(new (std::nothrow) ScratchImage);
    if (!timage)
    {
        wprintf(L"\nERROR: Memory allocation failed\n");
        return 1;
    }
    
    // Compute max luminosity across all images
    XMVECTOR maxLum = XMVectorZero();
    hr = EvaluateImage(image->GetImages(), image->GetImageCount(), image->GetMetadata(),
        [&](const XMVECTOR* pixels, size_t w, size_t y)
        {
            UNREFERENCED_PARAMETER(y);
    
            for (size_t j = 0; j < w; ++j)
            {
                static const XMVECTORF32 s_luminance = { { { 0.3f, 0.59f, 0.11f, 0.f } } };
    
                XMVECTOR v = *pixels++;
    
                v = XMVector3Dot(v, s_luminance);
    
                maxLum = XMVectorMax(v, maxLum);
            }
        });
    if (FAILED(hr))
    {
        wprintf(L" FAILED [tonemap maxlum] (%08X%ls)\n", static_cast<unsigned int>(hr), GetErrorDesc(hr));
        return 1;
    }
    
    maxLum = XMVectorMultiply(maxLum, maxLum);
    
    hr = TransformImage(image->GetImages(), image->GetImageCount(), image->GetMetadata(),
        [&](XMVECTOR* outPixels, const XMVECTOR* inPixels, size_t w, size_t y)
        {
            UNREFERENCED_PARAMETER(y);
    
            for (size_t j = 0; j < w; ++j)
            {
                XMVECTOR value = inPixels[j];
    
                const XMVECTOR scale = XMVectorDivide(
                    XMVectorAdd(g_XMOne, XMVectorDivide(value, maxLum)),
                    XMVectorAdd(g_XMOne, value));
                const XMVECTOR nvalue = XMVectorMultiply(value, scale);
    
                value = XMVectorSelect(value, nvalue, g_XMSelect1110);
    
                outPixels[j] = value;
            }
        }, *timage);
    if (FAILED(hr))
    {
        wprintf(L" FAILED [tonemap apply] (%08X%ls)\n", static_cast<unsigned int>(hr), GetErrorDesc(hr));
        return 1;
    }
    

    HDR10

    UPDATE: If you are trying to convert HDR floating-point content to an "HDR10" signal, then you need to do:

    // HDTV to UHDTV (Rec.709 color primaries into Rec.2020)
    const XMMATRIX c_from709to2020 =
    {
        0.6274040f, 0.0690970f, 0.0163916f, 0.f,
        0.3292820f, 0.9195400f, 0.0880132f, 0.f,
        0.0433136f, 0.0113612f, 0.8955950f, 0.f,
        0.f,        0.f,        0.f,        1.f
    };
    
    // DCI-P3-D65 https://en.wikipedia.org/wiki/DCI-P3 to UHDTV (DCI-P3-D65 color primaries into Rec.2020)
    const XMMATRIX c_fromP3D65to2020 =
    {
        0.753845f, 0.0457456f, -0.00121055f, 0.f,
        0.198593f, 0.941777f,   0.0176041f,  0.f,
        0.047562f, 0.0124772f,  0.983607f,   0.f,
        0.f,       0.f,         0.f,         1.f
    };
    
    // Custom Rec.709 into Rec.2020
    const XMMATRIX c_fromExpanded709to2020 = 
    {
        0.6274040f, 0.0457456f, -0.00121055f, 0.f,
        0.3292820f, 0.941777f,   0.0176041f,  0.f,
        0.0433136f, 0.0124772f,  0.983607f,   0.f,
        0.f,        0.f,         0.f,         1.f
    };
    
    inline float LinearToST2084(float normalizedLinearValue)
    {
        const float ST2084 = pow((0.8359375f + 18.8515625f * pow(abs(normalizedLinearValue), 0.1593017578f)) / (1.0f + 18.6875f * pow(abs(normalizedLinearValue), 0.1593017578f)), 78.84375f);
        return ST2084;  // Don't clamp between [0..1], so we can still perform operations on scene values higher than 10,000 nits
    }
    
    // You can adjust this up to 10000.f
    float paperWhiteNits = 200.f;
    
    hr = TransformImage(image->GetImages(), image->GetImageCount(), image->GetMetadata(),
        [&](XMVECTOR* outPixels, const XMVECTOR* inPixels, size_t w, size_t y)
        {
            UNREFERENCED_PARAMETER(y);
    
            const XMVECTOR paperWhite = XMVectorReplicate(paperWhiteNits);
    
            for (size_t j = 0; j < w; ++j)
            {
                XMVECTOR value = inPixels[j];
    
                XMVECTOR nvalue = XMVector3Transform(value, c_from709to2020);
    // Some people prefer the look of using c_fromP3D65to2020
    // or c_fromExpanded709to2020 instead.
    
                // Convert to ST.2084
                nvalue = XMVectorDivide(XMVectorMultiply(nvalue, paperWhite), c_MaxNitsFor2084);
    
                XMFLOAT4A tmp;
                XMStoreFloat4A(&tmp, nvalue);
    
                tmp.x = LinearToST2084(tmp.x);
                tmp.y = LinearToST2084(tmp.y);
                tmp.z = LinearToST2084(tmp.z);
    
                nvalue = XMLoadFloat4A(&tmp);
    
                value = XMVectorSelect(value, nvalue, g_XMSelect1110);
    
                outPixels[j] = value;
            }
        }, *timage);
    

    You should really take a look at texconv.

    Reference

    Reinhard et al., "Photographic tone reproduction for digital images", ACM Transactions on Graphics, Volume 21, Issue 3 (July 2002). ACM DL.