I'm implementing the bloom effect using a compute shader.
If I generate mipmaps right after the creation of the textures, bloom does not work. If I generate mipmaps in a loop where the bloom is calculated it works.
I don't want mipmaps to be generated every frame.
My question is why the bloom does not work if I generate mipmaps only when the textures are created. Why I have to generate mips every frame?
To simplify things my viewport size is always the same: 1600x900. C++/glsl code might be kinda long (but it's right after the pseudocode anyways), maybe you can spot the problem only using the pseudocode:
colorImage = Create(screenW, screenH);
colorImage.GenerateMipmaps();
prefilteredImage = Create(bloomW, bloomH);
prefilteredImage.GenerateMipmaps();
downsampledStagingImage = Create(bloomW, bloomH);
downsampledStagingImage.GenerateMipmaps();
downsampledImage = Create(bloomW, bloomH);
downsampledImage.GenerateMipmaps();
upsampledgImage = Create(bloomW, bloomH);
upsampledImage.GenerateMipmaps();
while(true) {
colorImage = RenderScene();
PrefilterBloomPass();
DownsamplingBloomPass();
UpsamplingBloomPass();
CompositeColorImageWithBloom();
}
mips generated every frame pseudocode:
//textures created same as before
while(true) {
colorImage = RenderScene();
prefilteredImage.GenerateMipmaps();
PrefilterBloomPass();
downsampledStagingImage.GenerateMipmaps();
downsampledImage.GenerateMipmaps();
DownsamplingBloomPass();
upsampledImage.GenerateMipmaps();
UpsamplingBloomPass();
CompositeColorImageWithBloom();
}
result - bloom works:
Here's the c++/glsl implementation I've got a class that handles it:
I don't think the error is related to this class, but maybe you'll spot something
struct OpenGLTexture
{
struct CreateInfo
{
uint32_t w = 0;
uint32_t h = 0;
//TextureParameters is not related to this question so I'll just skip the implementation
TextureParameters parameters;
};
OpenGLTexture(const CreateInfo& info)
{
const uint32_t format = ConvertFormatToOpenGL(info.parameters.format);
const uint32_t internalFormat = ConvertInternalFormatToOpenGL(info.parameters.format);
const uint32_t type = ConvertTextureFormatToOpenGLDataType(info.parameters.format);
//... miplevel, format, wrap variables
glCreateTextures(target, 1, &glHandle);
glTextureStorage2D(glHandle, miplevel, internalFormat, w, h);
if (wrap != 0) {
glTextureParameteri(glHandle, GL_TEXTURE_WRAP_S, wrap);
glTextureParameteri(glHandle, GL_TEXTURE_WRAP_T, wrap);
}
if (filterMag != 0) {
glTextureParameteri(glHandle, GL_TEXTURE_MAG_FILTER, filterMag);
}
if (filterMin != 0) {
glTextureParameteri(glHandle, GL_TEXTURE_MIN_FILTER, filterMin);
}
if (miplevel) {
glTextureParameteri(glHandle, GL_TEXTURE_BASE_LEVEL, 0);
glTextureParameteri(glHandle, GL_TEXTURE_MAX_LEVEL, miplevel);
glGenerateTextureMipmap(glHandle);
}
}
void GenerateMipmap() const
{
glGenerateTextureMipmap(texture);
}
void Bind(uint32_t bindingPoint)
{
glBindTextureUnit(bindingPoint, texture);
}
void BindImage(uint32_t bindingPoint, uint32_t level, uint32_t access, uint32_t format)
{
glBindImageTexture(bindingPoint, texture, level, GL_FALSE, 0, access, format);
}
GLuint glHandle = 0;
};
my bloom shader uses the sampling method described in the COD Advanced Warfare article, I guess? It looks like this
#version 460 core
layout(binding = 0, rgba32f) restrict writeonly uniform image2D o_image;
layout(binding = 1) uniform sampler2D u_colorTexture;
layout(binding = 2) uniform sampler2D u_bloomTexture;
const float Epsilon = 1.0e-4;
layout(location = 0) uniform float threshold;
layout(location = 1) uniform float knee;
layout(location = 2) uniform float lod;
layout(location = 3) uniform int mode;
#define MODE_PREFILTER 0
#define MODE_DOWNSAMPLE 1
#define MODE_UPSAMPLE_FIRST 2
#define MODE_UPSAMPLE 3
vec4 QuadraticThreshold(vec4 color, float threshold, vec3 curve)
{
// Maximum pixel brightness
float brightness = max(max(color.r, color.g), color.b);
// Quadratic curve
float rq = clamp(brightness - curve.x, 0.0, curve.y);
rq = (rq * rq) * curve.z;
color *= max(rq, brightness - threshold) / max(brightness, Epsilon);
return color;
}
vec4 Prefilter(vec4 color, vec2 uv)
{
vec4 params = { threshold, threshold - knee, knee * 2.0f, 0.25f / knee };
float clampValue = 20.0f;
color = min(vec4(clampValue), color);
color = QuadraticThreshold(color, params.x, params.yzw);
return color;
}
vec3 DownsampleBox13(sampler2D tex, float lod, vec2 uv, vec2 texelSize)
{
// Center
vec3 A = textureLod(tex, uv, lod).rgb;
texelSize *= 0.5f; // Sample from center of texels
// Inner box
vec3 B = textureLod(tex, uv + texelSize * vec2(-1.0f, -1.0f), lod).rgb;
vec3 C = textureLod(tex, uv + texelSize * vec2(-1.0f, 1.0f), lod).rgb;
vec3 D = textureLod(tex, uv + texelSize * vec2(1.0f, 1.0f), lod).rgb;
vec3 E = textureLod(tex, uv + texelSize * vec2(1.0f, -1.0f), lod).rgb;
// Outer box
vec3 F = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
vec3 G = textureLod(tex, uv + texelSize * vec2(-2.0f, 0.0f), lod).rgb;
vec3 H = textureLod(tex, uv + texelSize * vec2(0.0f, 2.0f), lod).rgb;
vec3 I = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
vec3 J = textureLod(tex, uv + texelSize * vec2(2.0f, 2.0f), lod).rgb;
vec3 K = textureLod(tex, uv + texelSize * vec2(2.0f, 0.0f), lod).rgb;
vec3 L = textureLod(tex, uv + texelSize * vec2(-2.0f, -2.0f), lod).rgb;
vec3 M = textureLod(tex, uv + texelSize * vec2(0.0f, -2.0f), lod).rgb;
// Weights
vec3 result = vec3(0.0);
// Inner box
result += (B + C + D + E) * 0.5f;
// Bottom-left box
result += (F + G + A + M) * 0.125f;
// Top-left box
result += (G + H + I + A) * 0.125f;
// Top-right box
result += (A + I + J + K) * 0.125f;
// Bottom-right box
result += (M + A + K + L) * 0.125f;
// 4 samples each
result *= 0.25f;
return result;
}
vec3 UpsampleTent9(sampler2D tex, float lod, vec2 uv, vec2 texelSize, float radius)
{
vec4 offset = texelSize.xyxy * vec4(1.0f, 1.0f, -1.0f, 0.0f) * radius;
// Center
vec3 result = textureLod(tex, uv, lod).rgb * 4.0f;
result += textureLod(tex, uv - offset.xy, lod).rgb;
result += textureLod(tex, uv - offset.wy, lod).rgb * 2.0;
result += textureLod(tex, uv - offset.zy, lod).rgb;
result += textureLod(tex, uv + offset.zw, lod).rgb * 2.0;
result += textureLod(tex, uv + offset.xw, lod).rgb * 2.0;
result += textureLod(tex, uv + offset.zy, lod).rgb;
result += textureLod(tex, uv + offset.wy, lod).rgb * 2.0;
result += textureLod(tex, uv + offset.xy, lod).rgb;
return result * (1.0f / 16.0f);
}
layout(local_size_x = 4, local_size_y = 4) in;
void main()
{
vec2 imgSize = vec2(imageSize(o_image));
vec2 uv = gl_GlobalInvocationID.xy / imgSize;
uv += (1.0f / imgSize) * 0.5f;
vec2 texSize = vec2(textureSize(u_colorTexture, int(lod)));
vec4 color = vec4(1, 0, 1, 1);
if(mode == MODE_PREFILTER) {
color.rgb = DownsampleBox13(u_colorTexture, 0, uv, 1.0f / texSize);
color = Prefilter(color, uv);
color.a = 1.0f;
} else if (mode == MODE_UPSAMPLE_FIRST) {
vec2 bloomTexSize = vec2(textureSize(u_colorTexture, int(lod + 1.0f)));
float sampleScale = 1.0f;
vec3 upsampledTexture = UpsampleTent9(u_colorTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);
vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
color.rgb = existing + upsampledTexture;
} else if (mode == MODE_UPSAMPLE) {
vec2 bloomTexSize = vec2(textureSize(u_bloomTexture, int(lod + 1.0f)));
float sampleScale = 1.0f;
vec3 upsampledTexture = UpsampleTent9(u_bloomTexture, lod + 1.0f, uv, 1.0f / bloomTexSize, sampleScale);
vec3 existing = textureLod(u_colorTexture, uv, lod).rgb;
color.rgb = existing + upsampledTexture;
} else if (mode == MODE_DOWNSAMPLE) {
color.rgb = DownsampleBox13(u_colorTexture, lod, uv, 1.0f / texSize);
}
imageStore(o_image, ivec2(gl_GlobalInvocationID), color);
}
so, as I said before the textures used in this bloom pass are created as the application startup. Let's say they're global for simplicity
OpenGLTexture2D* filteredImage;
OpenGLTexture2D* downsampledStagingImage;
OpenGLTexture2D* downsampledImage;
OpenGLTexture2D* upsampledImage;
static constexpr uint32_t bloomComputeWorkgroupSize = 4;
Vector2<uint32_t> bloomViewport;
void Init(const Vector2<uint32_t>& viewport)
{
bloomViewport = sceneViewport / 2U;
bloomViewport += bloomComputeWorkgroupSize - (bloomViewport % bloomComputeWorkgroupSize);
OpenGLTexture2D::CreateInfo bloomImageInfo;
bloomImageInfo.w = bloomViewport.x;
bloomImageInfo.h = bloomViewport.y;
bloomImageInfo.parameters = TextureParameters().
Format(TextureFormat::RGBA32_FLOAT).
Wrap(TextureWrap::Clamp);
filteredImage = new OpenGLTexture(bloomImageInfo);
downsampledStagingImage = new OpenGLTexture(bloomImageInfo);
downsampledImage = new OpenGLTexture(bloomImageInfo);
upsampledImage = new OpenGLTexture(bloomImageInfo);
}
and then every frame I execute 3 passes: filter pass, downscaling pass and upscaling pass
float threshold = 1.0f;
float knee = 0.1f;
while(true) {
OpenGLTexture* colorImage = RenderScene();
auto shader = ShaderCache::getShader("Bloom");
float lod = 0.0f;
shader->setUniform("mode", 0); // 0 means prefiltering
shader->setUniform("threshold", threshold);
shader->setUniform("knee", knee);
shader->setUniform("lod", lod);
shader->Bind();
filteredImage->BindImage(0, 0, GL_WRITE_ONLY, GL_RGBA32F);
colorImage->Bind(1);
//bind whatever, prefilter mode does not use the second slot
colorImage->Bind(2);
auto mipSize = filteredImage->getMipSize(0);
uint32_t workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
uint32_t workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
glDispatchCompute(workGroupsX, workGroupsY, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
//downsampling
shader->setUniform("mode", 1); //1 means downsampling
uint32_t mips = filteredImage->getMipCount() - 2;
for (uint32_t i = 1; i < mips; i++) {
mipSize = filteredImage->getMipSize(i);
workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
{
downsampledStagingImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
filteredImage->Bind(1);
lod = (float)i - 1.0f;
shader->setUniform("lod", lod);
shader->Bind();
glDispatchCompute(workGroupsX, workGroupsY, 1);
}
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
{
downsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
downsampledStagingImage->Bind(1);
lod = (float)i;
shader->setUniform("lod", lod);
shader->Bind();
glDispatchCompute(workGroupsX, workGroupsY, 1);
}
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
//upsampling
mips -= 2;
lod--;
shader->setUniform("mode", 2); //upsample first
upsampledImage->BindImage(0, i, GL_WRITE_ONLY, GL_RGBA32F);
downsampledImage->Bind(1);
colorImage->Bind(2);
shader->setUniform("lod", lod);
shader->Bind();
mipSize = upsampledImage->getMipSize(mips);
workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
glDispatchCompute(workGroupsX, workGroupsY, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
shader->setUniform("mode", 2); //upsample in a loop
for (int32_t mip = mips - 3; mip >= 0; mip--) {
mipSize = upsampledImage->getMipSize(mip);
workGroupsX = (uint32_t)glm::ceil((float)mipSize.x / (float)bloomComputeWorkgroupSize);
workGroupsY = (uint32_t)glm::ceil((float)mipSize.y / (float)bloomComputeWorkgroupSize);
upsampledImage->BindImage(0, mip, GL_WRITE_ONLY, GL_RGBA32F);
filteredImage->Bind(1);
upsampledImage->Bind(2);
shader->setUniform("lod", (float)mip);
shader->Bind();
glDispatchCompute(workGroupsX, workGroupsY, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
}
The whole question seems to based on a wrong premise: glGenerateMipmaps
is not only responsible for allocating the memory for the mipmaps, but it also calculates the downsampled content of them.
Since your code writes new data to the texture in every frame, and you read from the mipmaps, the new downsampled representations also have to be calculated in each frame.