I try to implement Order Independent Transparency on my own. It is like finished without one thing... As you can see in the picture below, the OIT with MSAA is some kind of wrong. I think it is because of the samples. because at each triangle edge there are 4 samples (and only at triangle edges).
Create the lists
RWByteAddressBuffer tRWFragmentList : register(u1);
void main(PS_INPUT input)
{
float2 position = (input.Pos.xy - float2(0.5,0.5)) / input.Pos.w;
uint nXPosition = position.x;
uint nYPosition = position.y;
uint vScreenAddress = nScreenWidth * nYPosition + nXPosition;
float3 Normal = normalize((float3)input.Normal);
float3 Position = (float3)input.Pos;
float4 Color = createphong(input);
//float4 Color = (float4)input.Diffuse;
// Get counter value and increment
uint nNewFragmentAddress = 0;
tRWFragmentList.InterlockedAdd(0, 44, nNewFragmentAddress);
if (nNewFragmentAddress < 1000*1000*500)
{
uint pixel = 4 + nScreenWidth * nScreenHeight * 4 + nNewFragmentAddress;
tRWFragmentList.Store(pixel + 4, asuint(Position.x));
tRWFragmentList.Store(pixel + 8, asuint(Position.y));
tRWFragmentList.Store(pixel + 12, asuint(Position.z));
tRWFragmentList.Store(pixel + 16, asuint(Normal.x));
tRWFragmentList.Store(pixel + 20, asuint(Normal.y));
tRWFragmentList.Store(pixel + 24, asuint(Normal.z));
tRWFragmentList.Store(pixel + 28, asuint(Color.r));
tRWFragmentList.Store(pixel + 32, asuint(Color.g));
tRWFragmentList.Store(pixel + 36, asuint(Color.b));
tRWFragmentList.Store(pixel + 40, asuint(Color.a));
uint output = 0;
tRWFragmentList.InterlockedExchange(vScreenAddress * 4 + 4, pixel, output);
tRWFragmentList.Store(pixel, output);
}
}
Sort the lists
RWByteAddressBuffer tRWFragmentList : register(u1);
float4 main(PS_INPUT input) : SV_Target
{
float2 position = (input.Pos.xy - float2(0.5,0.5)) / input.Pos.w;
uint nXPosition = position.x;
uint nYPosition = position.y;
uint vScreenAddress = 4+(nScreenWidth * nYPosition + nXPosition) * 4;
if (tRWFragmentList.Load(vScreenAddress) != 0)
{
uint i = vScreenAddress;
uint j = vScreenAddress;
float zMin = 0;
uint zMinPrev = i;
do
{
i = j;
zMin = asfloat(tRWFragmentList.Load(tRWFragmentList.Load(i) + 12));
zMinPrev = i;
do
{
if (asfloat(tRWFragmentList.Load(tRWFragmentList.Load(i) + 12)) > zMin)
{
zMin = asfloat(tRWFragmentList.Load(tRWFragmentList.Load(i) + 12));
zMinPrev = i;
}
i = tRWFragmentList.Load(i);
}
while (tRWFragmentList.Load(i) > 0);
//check swap
if (zMinPrev != j)
{
uint trwJ = tRWFragmentList.Load(j);
uint trwtrwMin = tRWFragmentList.Load(tRWFragmentList.Load(zMinPrev));
uint trwMin = tRWFragmentList.Load(zMinPrev);
tRWFragmentList.Store(j,trwMin);
tRWFragmentList.Store(zMinPrev,trwtrwMin);
tRWFragmentList.Store(trwMin,trwJ);
}
j = tRWFragmentList.Load(j);
}
while (tRWFragmentList.Load(j) > 0);
}
return float4(1, 0, 1, 1);
}
Render the finished picture
RWByteAddressBuffer tRWFragmentList : register(u1);
float4 main(PS_INPUT input) : SV_Target
{
float2 position = (input.Pos.xy - float2(0.5,0.5)) / input.Pos.w;
uint nXPosition = position.x;
uint nYPosition = position.y;
uint vScreenAddress = nScreenWidth * nYPosition + nXPosition;
float3 Color = float3(0.5, 0.5, 0.5);
uint nScreenAdress = vScreenAddress*4+4;
while (tRWFragmentList.Load(nScreenAdress) != 0)
{
nScreenAdress = tRWFragmentList.Load(nScreenAdress);
float4 NewColor = float4(asfloat(tRWFragmentList.Load(nScreenAdress + 28)),
asfloat(tRWFragmentList.Load(nScreenAdress + 32)),
asfloat(tRWFragmentList.Load(nScreenAdress + 36)),
asfloat(tRWFragmentList.Load(nScreenAdress + 40)));
float fZValue = asfloat(tRWFragmentList.Load(nScreenAdress + 12));
Color = NewColor.a * NewColor.rgb + (1 - NewColor.a) * Color.rgb;
}
tRWFragmentList.Store(vScreenAddress * 4 + 4, 0);
if (nXPosition == 0 && nYPosition)
{
tRWFragmentList.Store(0, 0);
}
return float4(Color.r, Color.g, Color.b, 1);
}
My idea is to write the sample number inside the list, and at the end when I render the endpicture, I compare the list nodes and if they are to close together I want to check the sample number and calculate the average color. But I don't know how to get the aktual sample number...
BTW: does some one have a better idear to fix this bug? It does not need to be a fast calculation, I don't render in realtime.
You have to use sv_coverage
to read a mask in the pixel shader of the touched fragments.
With it you resolve the transparency (and msaa in one go) by accumulating into N values ( N as MSAA Nx ) according to the coverage, then average and output.
If you want to output in the msaa surface instead prior to the resolve, you have to use a compute shader to be able to do the accumulation once then write the N values separately.
I would go compute for everything but the actual mesh render, it is more convenient than pixel shader for that kind of processing