I'm trying to unpack SNorm16 values in HLSL compute shader. Since SNorm16x4 = 8 bytes total, and Load/Store functions can only read/write 4 bytes I'm trying to get two 4 bytes values as packed to 1 value of 8 bytes, unpack it to 4 values, work with result and then pack it pack and store as 1 packed value of 8 bytes.
The code is:
float2 UnpackFromSNorm16x2(uint v)
{
uint2 tempU = asuint(uint2(v, v >> 16) & 0xFFFF);
int2 tempI = int2(tempU.x - 32767, tempU.y - 32767);
return float2( tempI * float(1.0 / 32767.0));
}
int FloatToSNorm16(float v)
{
//According to D3D10 rules, the value "-1.0f" has two representations:
// 0x1000 and 0x10001
//This allows everyone to convert by just multiplying by 32767 instead
//of multiplying the negative values by 32768 and 32767 for positive.
return int(clamp(v >= 0.0f ? (v * 32767.0f + 0.5f) : (v * 32767.0f - 0.5f), -32768.0f, 32767.0f));
}
uint PackToSNorm16x2(float2 v)
{
int intX = int(FloatToSNorm16(v.x));
int intY = int(FloatToSNorm16(v.y));
uint2 uintXY = uint2(clamp(intX + 32767, 0, 65535), clamp(intY + 32767, 0, 65535));
uint x = (uintXY.x << 0) & 0x0000FFFF;
uint y = (uintXY.y << 16) & 0xFFFF0000;
return x | y;
}
uint2 inputTangentUInt = asuint(vertices.Load2(baseOffset + tangentOffset));
float4 qTangentUnpacked = float4(UnpackFromSNorm16x2(inputTangentUInt.x), UnpackFromSNorm16x2(inputTangentUInt.y));
//Do some work with qTangentUnpacked
uint2 qTangentPacked = uint2(PackTwoSNORM16(qTangentUnpacked.xy), PackTwoSNORM16(qTangentUnpacked.zw));
vertices.Store2(baseOffset + tangentOffset, asuint(qTangentPacked));
But final result is wrong, looks like some data lost. What am I doing wrong?
There is a collection of HLSL routines for doing these kinds of conversions called D3DX_DXGIFormatConvert.inl
. It is documented on Microsoft Learn, and used to ship in the legacy DirectX SDK.
typedef int INT;
typedef int2 XMINT2;
typedef float2 XMFLOAT2;
#define D3DX11INLINE
#define D3DX_Truncate_FLOAT(_V) trunc(_V)
#define hlsl_precise precise
D3DX11INLINE FLOAT D3DX_INT_to_FLOAT(INT _V,
FLOAT _Scale)
{
FLOAT Scaled = (FLOAT)_V / _Scale;
// The integer is a two's-complement signed
// number so the negative range is slightly
// larger than the positive range, meaning
// the scaled value can be slight less than -1.
// Clamp to keep the float range [-1, 1].
return max(Scaled, -1.0f);
}
D3DX11INLINE INT D3DX_FLOAT_to_INT(FLOAT _V,
FLOAT _Scale)
{
return (INT)D3DX_Truncate_FLOAT(_V * _Scale + (_V >= 0 ? 0.5f : -0.5f));
}
D3DX11INLINE XMFLOAT2 D3DX_R16G16_SNORM_to_FLOAT2(UINT packedInput)
{
hlsl_precise XMFLOAT2 unpackedOutput;
XMINT2 signExtendedBits;
signExtendedBits.x = (INT)(packedInput << 16) >> 16;
signExtendedBits.y = (INT)(packedInput & 0xffff0000) >> 16;
unpackedOutput.x = D3DX_INT_to_FLOAT(signExtendedBits.x, 32767);
unpackedOutput.y = D3DX_INT_to_FLOAT(signExtendedBits.y, 32767);
return unpackedOutput;
}
D3DX11INLINE UINT D3DX_FLOAT2_to_R16G16_SNORM(hlsl_precise XMFLOAT2 unpackedInput)
{
UINT packedOutput;
packedOutput = ( (D3DX_FLOAT_to_INT(D3DX_SaturateSigned_FLOAT(unpackedInput.x), 32767) & 0x0000ffff) |
(D3DX_FLOAT_to_INT(D3DX_SaturateSigned_FLOAT(unpackedInput.y), 32767) <<16) );
return packedOutput;
}
The latest version is on GitHub
See this blog post for more information.