c++openglglslpost-processingbloom

Why I have to generate mipmap every frame?


I'm implementing the bloom effect using a compute shader.

If I generate mipmaps right after the creation of the textures, bloom does not work. If I generate mipmaps in a loop where the bloom is calculated it works.

I don't want mipmaps to be generated every frame.

My question is why the bloom does not work if I generate mipmaps only when the textures are created. Why I have to generate mips every frame?

To simplify things my viewport size is always the same: 1600x900. C++/glsl code might be kinda long (but it's right after the pseudocode anyways), maybe you can spot the problem only using the pseudocode:

colorImage = Create(screenW, screenH);
colorImage.GenerateMipmaps();

prefilteredImage = Create(bloomW, bloomH);
prefilteredImage.GenerateMipmaps();

downsampledStagingImage = Create(bloomW, bloomH);
downsampledStagingImage.GenerateMipmaps();

downsampledImage = Create(bloomW, bloomH);
downsampledImage.GenerateMipmaps();

upsampledgImage = Create(bloomW, bloomH);
upsampledImage.GenerateMipmaps();

while(true) {
    colorImage = RenderScene();

    PrefilterBloomPass();
    DownsamplingBloomPass();
    UpsamplingBloomPass();

    CompositeColorImageWithBloom();
}

result - no bloom: enter image description here

mips generated every frame pseudocode:

//textures created same as before

while(true) {
    colorImage = RenderScene();

    prefilteredImage.GenerateMipmaps();
    PrefilterBloomPass();

    downsampledStagingImage.GenerateMipmaps();
    downsampledImage.GenerateMipmaps();
    DownsamplingBloomPass();

    upsampledImage.GenerateMipmaps();
    UpsamplingBloomPass();

    CompositeColorImageWithBloom();
}

result - bloom works:

enter image description here

Here's the c++/glsl implementation I've got a class that handles it:

I don't think the error is related to this class, but maybe you'll spot something

struct OpenGLTexture
{
    struct CreateInfo
    {
        uint32_t w = 0;
        uint32_t h = 0;
        //TextureParameters is not related to this question so I'll just skip the implementation
        TextureParameters parameters;
    };

    OpenGLTexture(const CreateInfo& info)
    {
        const uint32_t format = ConvertFormatToOpenGL(info.parameters.format);
        const uint32_t internalFormat = ConvertInternalFormatToOpenGL(info.parameters.format);
        const uint32_t type = ConvertTextureFormatToOpenGLDataType(info.parameters.format);
        //... miplevel, format, wrap variables

        glCreateTextures(target, 1, &glHandle);

        glTextureStorage2D(glHandle, miplevel, internalFormat, w, h);

        if (wrap != 0) {
            glTextureParameteri(glHandle, GL_TEXTURE_WRAP_S, wrap);
            glTextureParameteri(glHandle, GL_TEXTURE_WRAP_T, wrap);
        }

        if (filterMag != 0) {
            glTextureParameteri(glHandle, GL_TEXTURE_MAG_FILTER, filterMag);
        }

        if (filterMin != 0) {
            glTextureParameteri(glHandle, GL_TEXTURE_MIN_FILTER, filterMin);
        }

        if (miplevel) {
            glTextureParameteri(glHandle, GL_TEXTURE_BASE_LEVEL, 0);
            glTextureParameteri(glHandle, GL_TEXTURE_MAX_LEVEL, miplevel);
            glGenerateTextureMipmap(glHandle);
        }
    }

    void GenerateMipmap() const
    {
        glGenerateTextureMipmap(texture);
    }

    void Bind(uint32_t bindingPoint) 
    {
        glBindTextureUnit(bindingPoint, texture);
    }

    void BindImage(uint32_t bindingPoint, uint32_t level, uint32_t access, uint32_t format)
    {
        glBindImageTexture(bindingPoint, texture, level, GL_FALSE, 0, access, format);
    }

    GLuint glHandle = 0;
};

my bloom shader uses the sampling method described in the COD Advanced Warfare article, I guess? It looks like this

#version 460 core

layout(binding = 0, rgba32f) restrict writeonly uniform image2D o_image;
layout(binding = 1) uniform sampler2D u_colorTexture;
layout(binding = 2) uniform sampler2D u_bloomTexture;

const float Epsilon = 1.0e-4;

layout(location = 0) uniform float threshold;
layout(location = 1) uniform float knee;
layout(location = 2) uniform float lod;
layout(location = 3) uniform int mode;

#define MODE_PREFILTER      0
#define MODE_DOWNSAMPLE     1
#define MODE_UPSAMPLE_FIRST 2
#define MODE_UPSAMPLE       3

vec4 QuadraticThreshold(vec4 color, float threshold, vec3 curve)
{
    // Maximum pixel brightness
    float brightness = max(max(color.r, color.g), color.b);
    // Quadratic curve
    float rq = clamp(brightness - curve.x, 0.0, curve.y);
    rq = (rq * rq) * curve.z;
    color *= max(rq, brightness - threshold) / max(brightness, Epsilon);
    return color;
}

vec4 Prefilter(vec4 color, vec2 uv)
{
    vec4 params = { threshold, threshold - knee, knee * 2.0f, 0.25f / knee };

    float clampValue = 20.0f;
    color = min(vec4(clampValue), color);
    color = QuadraticThreshold(color, params.x, params.yzw);
    return color;
}

vec3 DownsampleBox13(sampler2D tex, float lod, vec2 uv, vec2 texelSize)
{
    // Center
    vec3 A = textureLod(tex, uv, lod).rgb;

    texelSize *= 0.5f; // Sample from center of texels

    // Inner box
    vec3 B = textureLod(tex, uv + texelSize * vec2(-1.0f, -1.0f), lod).rgb;
    vec3 C = textureLod(tex, uv + texelSize * vec2(-1.0f, 1.0f), lod).rgb;
    vec3 D = textureLod(tex, uv + texelSize * vec2(1.0f, 1.0f), lod).rgb;
    vec3 E = textureLod(tex, uv + texelSize * vec2(1.0f, -1.0f), lod).rgb;

    // Outer box
    vec3 F = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
    vec3 G = textureLod(tex, uv + texelSize * vec2(-2.0f, 0.0f), lod).rgb;
    vec3 H = textureLod(tex, uv + texelSize * vec2(0.0f, 2.0f), lod).rgb;
    vec3 I = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
    vec3 J = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
    vec3 K = textureLod(tex, uv + texelSize * vec2(2.0f, 0.0f), lod).rgb;
    vec3 L = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
    vec3 M = textureLod(tex, uv + texelSize * vec2(0.0f, -2.0f), lod).rgb;

    // Weights
    vec3 result = vec3(0.0);
    // Inner box
    result += (B + C + D + E) * 0.5f;
    // Bottom-left box
    result += (F + G + A + M) * 0.125f;
    // Top-left box
    result += (G + H + I + A) * 0.125f;
    // Top-right box
    result += (A + I + J + K) * 0.125f;
    // Bottom-right box
    result += (M + A + K + L) * 0.125f;

    // 4 samples each
    result *= 0.25f;

    return result;
} 

vec3 UpsampleTent9(sampler2D tex, float lod, vec2 uv, vec2 texelSize, float radius)
{
    vec4 offset = texelSize.xyxy * vec4(1.0f, 1.0f, -1.0f, 0.0f) * radius;

    // Center
    vec3 result = textureLod(tex, uv, lod).rgb * 4.0f;

    result += textureLod(tex, uv - offset.xy, lod).rgb;
    result += textureLod(tex, uv - offset.wy, lod).rgb * 2.0;
    result += textureLod(tex, uv - offset.zy, lod).rgb;

    result += textureLod(tex, uv + offset.zw, lod).rgb * 2.0;
    result += textureLod(tex, uv + offset.xw, lod).rgb * 2.0;

    result += textureLod(tex, uv + offset.zy, lod).rgb;
    result += textureLod(tex, uv + offset.wy, lod).rgb * 2.0;
    result += textureLod(tex, uv + offset.xy, lod).rgb;

    return result * (1.0f / 16.0f);
}

layout(local_size_x = 4, local_size_y = 4) in;
void main()
{
    vec2 imgSize = vec2(imageSize(o_image));
    vec2 uv = gl_GlobalInvocationID.xy / imgSize;
    uv += (1.0f / imgSize) * 0.5f;

    vec2 texSize = vec2(textureSize(u_colorTexture, int(lod))); 
    vec4 color = vec4(1, 0, 1, 1);
    if(mode == MODE_PREFILTER) {
        color.rgb = DownsampleBox13(u_colorTexture, 0, uv, 1.0f / texSize);
        color = Prefilter(color, uv);
        color.a = 1.0f;
    } else if (mode == MODE_UPSAMPLE_FIRST) {
        vec2 bloomTexSize = vec2(textureSize(u_colorTexture, int(lod + 1.0f)));
        float sampleScale = 1.0f;
        vec3 upsampledTexture = UpsampleTent9(u_colorTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);

        vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
        color.rgb = existing + upsampledTexture;
    } else if (mode == MODE_UPSAMPLE) {
        vec2 bloomTexSize = vec2(textureSize(u_bloomTexture, int(lod + 1.0f)));
        float sampleScale = 1.0f;
        vec3 upsampledTexture = UpsampleTent9(u_bloomTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);

        vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
        color.rgb = existing + upsampledTexture;
    } else if (mode == MODE_DOWNSAMPLE) {
        color.rgb = DownsampleBox13(u_colorTexture, lod, uv, 1.0f / texSize);
    }

    imageStore(o_image, ivec2(gl_GlobalInvocationID), color);
}

so, as I said before the textures used in this bloom pass are created as the application startup. Let's say they're global for simplicity

OpenGLTexture2D* filteredImage;
OpenGLTexture2D* downsampledStagingImage;
OpenGLTexture2D* downsampledImage;
OpenGLTexture2D* upsampledImage;

static constexpr uint32_t bloomComputeWorkgroupSize = 4;

Vector2<uint32_t> bloomViewport;

void Init(const Vector2<uint32_t>& viewport)
{
    bloomViewport = sceneViewport / 2U;
    bloomViewport += bloomComputeWorkgroupSize - (bloomViewport % bloomComputeWorkgroupSize);

    OpenGLTexture2D::CreateInfo bloomImageInfo;
    bloomImageInfo.w = bloomViewport.x;
    bloomImageInfo.h = bloomViewport.y;
    bloomImageInfo.parameters = TextureParameters().
        Format(TextureFormat::RGBA32_FLOAT).
        Wrap(TextureWrap::Clamp);

    filteredImage = new OpenGLTexture(bloomImageInfo);
    downsampledStagingImage = new OpenGLTexture(bloomImageInfo);
    downsampledImage = new OpenGLTexture(bloomImageInfo);
    upsampledImage = new OpenGLTexture(bloomImageInfo);
}

and then every frame I execute 3 passes: filter pass, downscaling pass and upscaling pass

float threshold = 1.0f;
float knee = 0.1f;

while(true) {
    OpenGLTexture* colorImage = RenderScene();

    auto shader = ShaderCache::getShader("Bloom");

    float lod = 0.0f;

    shader->setUniform("mode", 0); // 0 means prefiltering
    shader->setUniform("threshold", threshold);
    shader->setUniform("knee", knee);
    shader->setUniform("lod", lod);
    shader->Bind();

    filteredImage->BindImage(0, 0, GL_WRITE_ONLY, GL_RGBA32F);
    colorImage->Bind(1);
    //bind whatever, prefilter mode does not use the second slot
    colorImage->Bind(2);     
    
    auto mipSize = filteredImage->getMipSize(0);
    uint32_t workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
    uint32_t workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

    glDispatchCompute(workGroupsX, workGroupsY, 1);
    glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

    //downsampling
    shader->setUniform("mode", 1); //1 means downsampling
    uint32_t mips = filteredImage->getMipCount() - 2;
    for (uint32_t i = 1; i < mips; i++) {
        mipSize = filteredImage->getMipSize(i);
        workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
        workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

        {
            downsampledStagingImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);

            filteredImage->Bind(1);

            lod = (float)i - 1.0f;
            shader->setUniform("lod", lod);
            shader->Bind();
            glDispatchCompute(workGroupsX, workGroupsY, 1);
        }

        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        {
            downsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);

            downsampledStagingImage->Bind(1);

            lod = (float)i;
            shader->setUniform("lod", lod);
            shader->Bind();
            glDispatchCompute(workGroupsX, workGroupsY, 1);
        }

        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
    }

    //upsampling
    mips -= 2;
    lod--;
    shader->setUniform("mode", 2); //upsample first
   
    upsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
    downsampledImage->Bind(1);
    colorImage->Bind(2);
    
    shader->setUniform("lod", lod);
    shader->Bind();
   
    mipSize = upsampledImage->getMipSize(mips);
    workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
    workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

    glDispatchCompute(workGroupsX, workGroupsY, 1);
    glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

    shader->setUniform("mode", 2); //upsample in a loop
    for (int32_t mip = mips - 3; mip >= 0; mip--) {
        mipSize = upsampledImage->getMipSize(mip);
        workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
        workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);

        upsampledImage->BindImage(0, mip, GL_WRITE_ONLY, GL_RGBA32F);

        filteredImage->Bind(1);
        upsampledImage->Bind(2);

        shader->setUniform("lod", (float)mip);
        shader->Bind();

        glDispatchCompute(workGroupsX, workGroupsY, 1);          
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
    }
}

Solution

  • The whole question seems to based on a wrong premise: glGenerateMipmaps is not only responsible for allocating the memory for the mipmaps, but it also calculates the downsampled content of them.

    Since your code writes new data to the texture in every frame, and you read from the mipmaps, the new downsampled representations also have to be calculated in each frame.