c++glslvulkanantialiasingpost-processing

SMAA weight blend texture is unclear


I try to use SMAA on vulkan, but the output image is not complete anti-aliasing.

like this:

SMAA image

I see edgesTex is perfect, but blendTex is not so clear comparing with weight image which is from smaaDemo

edgesTex

edgesTex

blendTex

blendTex

weight image from smaaDemo

smaaDemo's blendTex

So I suspect the problem is in the weight blend pass, but I've tried everything I can and it doesn't change anything.

Here are my code in minimum.

Here are shaders, I use SMAA.hlsl which is from iryoku/smaa

SMAA_define.h
//
// SMAA common header file
//

// push constant, use for smaa render target metrics
layout(push_constant) uniform PushConstant
{
    vec4 smaa_rt_metrics;
} pc;

#define SMAA_RT_METRICS pc.smaa_rt_metrics
#define SMAA_GLSL_4
#define SMAA_PRESET_ULTRA
SMAA_vert.h
//
// SMAA vertex shader common define
//

#include "SMAA_define.h"

#define SMAA_INCLUDE_PS 0
#include "SMAA.hlsl"

layout(location = 0) out vec2 texture_coord;
layout(location = 1) out vec2 pixel_coord;
layout(location = 2) out vec4 offset[3];

//
// fullscreen triangle
//
vec2 vertices[] =
{
    { -1, -1 },
    {  3, -1 },
    { -1,  3 },
};

vec2 texture_coords[] = 
{
    { 0, 0 },
    { 2, 0 },
    { 0, 2 },
};

void set_gl_Position_and_texture_coord()
{
    gl_Position   = vec4(vertices[gl_VertexIndex], 0, 1);
    texture_coord = texture_coords[gl_VertexIndex];
}
SMAA_frag.h
//
// SMAA fragment shader common define
//

#include "SMAA_define.h"

#define SMAA_INCLUDE_VS 0
#include "SMAA.hlsl"

layout(location = 0) in vec2 texture_coord;
layout(location = 1) in vec2 pixel_coord;
layout(location = 2) in vec4 offset[3];

layout(location = 0) out vec4 color;
SMAA_edge_detection.vert
#version 460

#include "SMAA_vert.h"

void main()
{
    set_gl_Position_and_texture_coord();
    SMAAEdgeDetectionVS(texture_coord, offset);
}
SMAA_edge_detection.frag
#version 460

#include "SMAA_frag.h"

layout(binding = 0) uniform sampler2D original_image;

void main()
{
    color = vec4(SMAAColorEdgeDetectionPS(texture_coord, offset, original_image), 0.0, 0.0);
}
SMAA_blend_weight.vert
#version 460

#include "SMAA_vert.h"

void main()
{
    set_gl_Position_and_texture_coord();
    SMAABlendingWeightCalculationVS(texture_coord, pixel_coord, offset);
}
SMAA_blend_weight.frag
#version 460

#include "SMAA_frag.h"

layout(binding = 1) uniform sampler2D edges_texture;
layout(binding = 2) uniform sampler2D area_texture;
layout(binding = 3) uniform sampler2D search_texture;

void main()
{
    color = SMAABlendingWeightCalculationPS(texture_coord, pixel_coord, offset, edges_texture, area_texture, search_texture, vec4(0.f));
}
SMAA_neighbor.vert
#version 460

#include "SMAA_vert.h"

void main()
{
    set_gl_Position_and_texture_coord();
    SMAANeighborhoodBlendingVS(texture_coord, offset[0]);
}
SMAA_neighbor.frag
#version 460

#include "SMAA_frag.h"

layout(binding = 0) uniform sampler2D original_image;
layout(binding = 4) uniform sampler2D blend_texture;

void main()
{
    color = SMAANeighborhoodBlendingPS(texture_coord, offset[0], original_image, blend_texture);
}

Shaders use for test non-anti-aliasing image. A triangle clockwise rotate 45 degree.

triangle.vert
#version 460

vec2 vertices[] =
{
  { 0, -.5 },
  { .5, .5 },
  { -.5, .5 },
};

mat2 rotate(float degree)
{
  return mat2
  (
    cos(degree), sin(degree),
    -sin(degree), cos(degree)
  );
}

void main()
{
  gl_Position = vec4(rotate(45) * vertices[gl_VertexIndex], 0, 1);
}
triangle.frag
#version 460

layout(location = 0) out vec4 color;

void main()
{
  color = vec4(1, 1, 1, 1);
}

main.cpp, which contains all code we need to test the program.

Use SDL3 and basic vulkan initialization and rendering code, also vma to allocate memory.

All external libraries can find in CMakeLists.txt and specific the tag.

All context of code cannot contain in here because of text limination. So you can find entire code on repo SMAA-test which have all vulkan initalization code and other. Here I only put nessessory code.

main.cpp
// structure
struct Image
{
  VkImage       handle;
  VkImageView   view;
  VmaAllocation allocation;
  VkFormat      format;
  VkExtent3D    extent;
};

struct Buffer
{
  VkBuffer      handle;
  VmaAllocation allocation;
};

struct PushConstant
{
  glm::vec4 smaa_rt_metrics;
};

struct Frame
{
  VkCommandBuffer cmd;
  VkFence         fence;
  VkSemaphore     image_available; 
  VkSemaphore     render_finished;
};

// global variables
std::vector<Frame> g_frames;
uint32_t           g_frame_index = 0;

VmaAllocator  g_allocator;
VkDevice      g_device;
VkFormat      g_swapchain_image_format;
VkExtent2D    g_swapchain_extent;
VkCommandPool g_command_pool;
// every pass output image
Image g_source_image;  // original image
Image g_edges_image;  // edge detection
Image g_blend_image;  // weight blend
Image g_output_image; // neighbor
VkSampler g_sampler;  // linear sampler
VkDescriptorPool      g_descriptor_pool;
VkDescriptorSetLayout g_descriptor_set_layout;
VkDescriptorSet       g_descriptor_set;
// use for blend weight pass
Image g_area_texture;
Image g_search_texture;

std::vector<VkPipeline>  g_pipelines(4);  // 0 : triangle, 1 : edge detection, 2 : blend weight, 3 : neighbor
VkPipelineLayout         g_pipeline_layout_SMAA;  // all SMAA pass use common pipeline layout

auto create_image(VkFormat format, VkExtent2D extent, VkImageUsageFlags usage)
{
  Image image
  {
    .format = format,
    .extent = { extent.width, extent.height, 1 },
  };

  VkImageCreateInfo image_info
  {
    .sType       = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
    .imageType   = VK_IMAGE_TYPE_2D,
    .format      = image.format,
    .extent      = image.extent,
    .mipLevels   = 1,
    .arrayLayers = 1,
    .samples     = VK_SAMPLE_COUNT_1_BIT,
    .tiling      = VK_IMAGE_TILING_OPTIMAL,
    .usage       = usage,
  };

  VmaAllocationCreateInfo alloc_info
  {
    .flags         = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT,
    .usage         = VMA_MEMORY_USAGE_AUTO,
  };
  check_vk(vmaCreateImage(g_allocator, &image_info, &alloc_info, &image.handle, &image.allocation, nullptr));

  VkImageViewCreateInfo image_view_info
  {
    .sType    = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
    .image    = image.handle,
    .viewType = VK_IMAGE_VIEW_TYPE_2D,
    .format   = image.format,
    .subresourceRange =
    {
      .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
      .levelCount = 1,
      .layerCount = 1,
    },
  };
  check_vk(vkCreateImageView(g_device, &image_view_info, nullptr, &image.view));

  return image;
}

void create_SMAA_images()
{
  g_source_image = create_image(g_swapchain_image_format, g_swapchain_extent, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
  g_edges_image  = create_image(g_swapchain_image_format, g_swapchain_extent, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
  g_blend_image  = create_image(g_swapchain_image_format, g_swapchain_extent, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
  g_output_image = create_image(g_swapchain_image_format, g_swapchain_extent, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
}

void create_sampler()
{
  VkSamplerCreateInfo sampler_info
  {
    .sType        = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
    .magFilter    = VK_FILTER_LINEAR,
    .minFilter    = VK_FILTER_LINEAR,
    .mipmapMode   = VK_SAMPLER_MIPMAP_MODE_LINEAR,
    .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
  };
  check_vk(vkCreateSampler(g_device, &sampler_info, nullptr, &g_sampler));
}

void create_descriptor_resource()
{
  // create descriptor pool
  VkDescriptorPoolSize pool_size
  {
    .type            = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
    .descriptorCount = 5,
  };
  VkDescriptorPoolCreateInfo pool_info
  {
    .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
    .maxSets       = 1,
    .poolSizeCount = 1,
    .pPoolSizes    = &pool_size,
  };
  check_vk(vkCreateDescriptorPool(g_device, &pool_info, nullptr, &g_descriptor_pool));

  // create descriptor set layout
  VkDescriptorSetLayoutBinding bindings[5]
  {
    { .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT },
    { .binding = 1, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT },
    { .binding = 2, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT },
    { .binding = 3, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT },
    { .binding = 4, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT },
  };
  VkDescriptorSetLayoutCreateInfo layout_info
  {
    .sType        = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
    .bindingCount = 5,
    .pBindings    = bindings,
  };
  check_vk(vkCreateDescriptorSetLayout(g_device, &layout_info, nullptr, &g_descriptor_set_layout));

  // allocate descriptor set
  VkDescriptorSetAllocateInfo alloc_info
  {
    .sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
    .descriptorPool     = g_descriptor_pool,
    .descriptorSetCount = 1,
    .pSetLayouts        = &g_descriptor_set_layout,
  };
  check_vk(vkAllocateDescriptorSets(g_device, &alloc_info, &g_descriptor_set));

  // update descriptor set
  std::vector<VkDescriptorImageInfo> image_infos
  {
    { .sampler = g_sampler, .imageView = g_source_image.view,   .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
    { .sampler = g_sampler, .imageView = g_edges_image.view,    .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
    { .sampler = g_sampler, .imageView = g_area_texture.view,   .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
    { .sampler = g_sampler, .imageView = g_search_texture.view, .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
    { .sampler = g_sampler, .imageView = g_blend_image.view,    .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
  };
  std::vector<VkWriteDescriptorSet> write_infos(5);
  for (size_t i = 0; i < image_infos.size(); ++i)
  {
    write_infos[i] = 
    {
      .sType           = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
      .dstSet          = g_descriptor_set,
      .dstBinding      = static_cast<uint32_t>(i),
      .descriptorCount = 1,
      .descriptorType  = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
      .pImageInfo      = &image_infos[i],
    };
  }
  vkUpdateDescriptorSets(g_device, static_cast<uint32_t>(write_infos.size()), write_infos.data(), 0, nullptr);
}

void create_SMAA_pipeline_layout()
{
  VkPushConstantRange push_constant_range
  {
    .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
    .size       = sizeof(PushConstant),
  };
  VkPipelineLayoutCreateInfo layout_info
  {
    .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    .setLayoutCount         = 1,
    .pSetLayouts            = &g_descriptor_set_layout,
    .pushConstantRangeCount = 1,
    .pPushConstantRanges    = &push_constant_range,
  };
  check_vk(vkCreatePipelineLayout(g_device, &layout_info, nullptr, &g_pipeline_layout_SMAA));
}

auto create_buffer(uint32_t size, VkBufferUsageFlags usages, VmaAllocationCreateFlags flags)
{
  Buffer buffer;

  VkBufferCreateInfo buf_info
  {
    .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
    .size  = size,
    .usage = usages,
  };
  VmaAllocationCreateInfo alloc_info
  {
    .flags = flags,
    .usage = VMA_MEMORY_USAGE_AUTO,
  };
  check_vk(vmaCreateBuffer(g_allocator, &buf_info, &alloc_info, &buffer.handle, &buffer.allocation, nullptr));

  return buffer;
}

void transform_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout old_layout, VkImageLayout new_layout)
{
  VkImageMemoryBarrier barrier
  {
    .sType               = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
    .oldLayout           = old_layout,
    .newLayout           = new_layout,
    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
    .image               = image,
    .subresourceRange    = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 },
  };
  vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier);
}

auto destroy(Buffer& buffer)
{
  assert(buffer.handle && buffer.allocation);
  vmaDestroyBuffer(g_allocator, buffer.handle, buffer.allocation);
  buffer = {};
}

void load_textures()
{
  // create texture images
  g_area_texture   = create_image(VK_FORMAT_R8G8_UNORM, { AREATEX_WIDTH, AREATEX_HEIGHT },     VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
  g_search_texture = create_image(VK_FORMAT_R8_UNORM,   { SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT }, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);

  // create stage buffer
  auto stage_buffer = create_buffer(AREATEX_SIZE + SEARCHTEX_SIZE, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);

  // copy texture data to stage buffer
  check_vk(vmaCopyMemoryToAllocation(g_allocator, areaTexBytes, stage_buffer.allocation, 0, AREATEX_SIZE));
  check_vk(vmaCopyMemoryToAllocation(g_allocator, searchTexBytes, stage_buffer.allocation, AREATEX_SIZE, SEARCHTEX_SIZE));

  // create command buffer to record
  VkCommandBuffer cmd;
  VkCommandBufferAllocateInfo cmd_info
  {
    .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
    .commandPool        = g_command_pool,
    .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
    .commandBufferCount = 1,
  };
  check_vk(vkAllocateCommandBuffers(g_device, &cmd_info, &cmd));

  // begin command buffer
  VkCommandBufferBeginInfo beg_info
  {
    .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
    .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT ,
  };
  vkBeginCommandBuffer(cmd, &beg_info);

  // transform image layout for copy
  transform_image_layout(cmd, g_area_texture.handle,   VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
  transform_image_layout(cmd, g_search_texture.handle, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);

  // copy buffer to area texture
  VkBufferImageCopy2 region
  {
    .sType            = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
    .bufferOffset     = 0,
    .imageSubresource =
    {
      .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
      .layerCount = 1,
    },
    .imageExtent = 
    {
      .width  = AREATEX_WIDTH,
      .height = AREATEX_HEIGHT,
      .depth  = 1,
    },
  };
  VkCopyBufferToImageInfo2 copy_info
  {
    .sType          = VK_STRUCTURE_TYPE_COPY_BUFFER_TO_IMAGE_INFO_2,
    .srcBuffer      = stage_buffer.handle,
    .dstImage       = g_area_texture.handle,
    .dstImageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
    .regionCount    = 1,
    .pRegions       = &region,
  };
  vkCmdCopyBufferToImage2(cmd, &copy_info);

  // copy buffer to search texture
  region.bufferOffset       = AREATEX_SIZE;
  region.imageExtent.width  = SEARCHTEX_WIDTH;
  region.imageExtent.height = SEARCHTEX_HEIGHT;
  copy_info.dstImage        = g_search_texture.handle;
  vkCmdCopyBufferToImage2(cmd, &copy_info);

  // transform image layout for shader read
  transform_image_layout(cmd, g_area_texture.handle,   VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  transform_image_layout(cmd, g_search_texture.handle, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);

  // end command buffer
  check_vk(vkEndCommandBuffer(cmd));

  // submit command buffer
  VkSubmitInfo submit_info
  {
    .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
    .commandBufferCount   = 1,
    .pCommandBuffers      = &cmd,
  };
  check_vk(vkQueueSubmit(g_queue, 1, &submit_info, VK_NULL_HANDLE));

  // wait for queue to finish
  check_vk(vkQueueWaitIdle(g_queue));

  // free command buffer
  vkFreeCommandBuffers(g_device, g_command_pool, 1, &cmd);

  // destroy stage buffer
  destroy(stage_buffer);
}

auto get_file_data(std::string_view filename)
{
  std::ifstream file(filename.data(), std::ios::ate | std::ios::binary);
  exit_if(!file.is_open());

  auto file_size = (size_t)file.tellg();
  // A SPIR-V module is defined a stream of 32bit words
  auto buffer    = std::vector<uint32_t>(file_size / sizeof(uint32_t));
  
  file.seekg(0);
  file.read((char*)buffer.data(), file_size);

  file.close();
  return buffer;
}

auto create_shader_module(std::string_view filename)
{
  auto data = get_file_data(filename);
  VkShaderModuleCreateInfo shader_info
  {
    .sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    .codeSize = data.size() * sizeof(uint32_t),
    .pCode    = reinterpret_cast<uint32_t*>(data.data()),
  };
  VkShaderModule shader_module;
  check_vk(vkCreateShaderModule(g_device, &shader_info, nullptr, &shader_module));
  return shader_module;
}

auto create_pipeline(VkPipelineLayout pipeline_layout, std::string_view vertex_shader, std::string_view fragment_shader) -> VkPipeline
{
  // dynamic rendering
  VkPipelineRenderingCreateInfo dynamic_rendering_info
  {
    .sType                   = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
    .colorAttachmentCount    = 1,
    .pColorAttachmentFormats = &g_swapchain_image_format,
  };

  // vertex and fragment shaders
  std::vector<VkPipelineShaderStageCreateInfo> shader_stages(2);
  shader_stages[0].sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
  shader_stages[0].stage  = VK_SHADER_STAGE_VERTEX_BIT;
  shader_stages[0].module = create_shader_module(vertex_shader);
  shader_stages[0].pName  = "main";
  shader_stages[1] = shader_stages[0];
  shader_stages[1].stage  = VK_SHADER_STAGE_FRAGMENT_BIT;
  shader_stages[1].module = create_shader_module(fragment_shader);

  // rasterization state
  VkPipelineRasterizationStateCreateInfo rasterization_state
  {
    .sType       = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
    .polygonMode = VK_POLYGON_MODE_FILL,
    .cullMode    = VK_CULL_MODE_BACK_BIT,
    .frontFace   = VK_FRONT_FACE_CLOCKWISE,
    .lineWidth   = 1.f,
  };

  // dynamic states
  auto dynamic_states = std::vector<VkDynamicState>
  {
    VK_DYNAMIC_STATE_VIEWPORT,
    VK_DYNAMIC_STATE_SCISSOR,
  };
  VkPipelineDynamicStateCreateInfo dynamic_state
  {
    .sType             = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
    .dynamicStateCount = static_cast<uint32_t>(dynamic_states.size()),
    .pDynamicStates    = dynamic_states.data(),
  };

  // misc states
  VkPipelineVertexInputStateCreateInfo vertex_input_state
  {
    .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
  };
  VkPipelineInputAssemblyStateCreateInfo input_assembly_state
  { 
    .sType    = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
    .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
  };
  VkPipelineViewportStateCreateInfo viewport_state
  { 
    .sType         = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
    .viewportCount = 1,
    .scissorCount  = 1,
  };
  VkPipelineMultisampleStateCreateInfo multisample_state
  {
    .sType                 = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
    .rasterizationSamples  = VK_SAMPLE_COUNT_1_BIT,
    .minSampleShading      = 1.f,
  };
  VkPipelineDepthStencilStateCreateInfo depth_stencil_state
  {
    .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
  };
  VkPipelineColorBlendAttachmentState color_blend_attachment
  {
    .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
                      VK_COLOR_COMPONENT_G_BIT |
                      VK_COLOR_COMPONENT_B_BIT |
                      VK_COLOR_COMPONENT_A_BIT,
  };
  VkPipelineColorBlendStateCreateInfo color_blend_state
  { 
    .sType           = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
    .attachmentCount = 1,
    .pAttachments    = &color_blend_attachment,
  };

  // create pipeline
  VkPipeline pipeline;
  VkGraphicsPipelineCreateInfo info
  {
    .sType               = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
    .pNext               = &dynamic_rendering_info,
    .stageCount          = static_cast<uint32_t>(shader_stages.size()),
    .pStages             = shader_stages.data(),
    .pVertexInputState   = &vertex_input_state,
    .pInputAssemblyState = &input_assembly_state,
    .pViewportState      = &viewport_state,
    .pRasterizationState = &rasterization_state,
    .pMultisampleState   = &multisample_state,
    .pDepthStencilState  = &depth_stencil_state,
    .pColorBlendState    = &color_blend_state,
    .pDynamicState       = &dynamic_state,
    .layout              = pipeline_layout,
  };
  check_vk(vkCreateGraphicsPipelines(g_device, nullptr, 1, &info, nullptr, &pipeline));

  // destroy shader modules
  vkDestroyShaderModule(g_device, shader_stages[0].module, nullptr);
  vkDestroyShaderModule(g_device, shader_stages[1].module, nullptr);

  return pipeline;
}

void create_SMAA_pipelines()
{
  g_pipelines[1] = create_pipeline(g_pipeline_layout_SMAA, "SMAA_edge_detection_vert.spv", "SMAA_edge_detection_frag.spv");
  g_pipelines[2] = create_pipeline(g_pipeline_layout_SMAA, "SMAA_blend_weight_vert.spv",   "SMAA_blend_weight_frag.spv");
  g_pipelines[3] = create_pipeline(g_pipeline_layout_SMAA, "SMAA_neighbor_vert.spv",       "SMAA_neighbor_frag.spv");
}

void post_processing()
{
  auto frame = g_frames[g_frame_index];

  // upload push constant and descriptor set
  PushConstant pc
  {
    .smaa_rt_metrics = glm::vec4(1.f / g_swapchain_extent.width, 1.f / g_swapchain_extent.height, g_swapchain_extent.width, g_swapchain_extent.height),
  };
  vkCmdPushConstants(frame.cmd, g_pipeline_layout_SMAA, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(pc), &pc);
  vkCmdBindDescriptorSets(frame.cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g_pipeline_layout_SMAA, 0, 1, &g_descriptor_set, 0, nullptr);

  // edge detection
  transform_image_layout(frame.cmd, g_edges_image.handle, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
  transform_image_layout(frame.cmd, g_source_image.handle, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  VkRenderingAttachmentInfo color_attachment
  {
    .sType       = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
    .imageView   = g_edges_image.view,
    .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
    .loadOp      = VK_ATTACHMENT_LOAD_OP_CLEAR,
    .storeOp     = VK_ATTACHMENT_STORE_OP_STORE,
    .clearValue  = { .color = { 0.f, 0.f, 0.f, 0.f } },
  };
  VkRenderingInfo rendering
  {
    .sType                = VK_STRUCTURE_TYPE_RENDERING_INFO,
    .renderArea           = { {}, g_swapchain_extent },
    .layerCount           = 1,
    .colorAttachmentCount = 1,
    .pColorAttachments    = &color_attachment,
  };
  vkCmdBeginRendering(frame.cmd, &rendering);
  vkCmdBindPipeline(frame.cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g_pipelines[1]);
  vkCmdDraw(frame.cmd, 3, 1, 0, 0);
  vkCmdEndRendering(frame.cmd);

  // blend weight
  transform_image_layout(frame.cmd, g_blend_image.handle, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
  transform_image_layout(frame.cmd, g_edges_image.handle, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  color_attachment.imageView = g_blend_image.view;
  vkCmdBeginRendering(frame.cmd, &rendering);
  vkCmdBindPipeline(frame.cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g_pipelines[2]);
  vkCmdDraw(frame.cmd, 3, 1, 0, 0);
  vkCmdEndRendering(frame.cmd);

  // neighbor
  transform_image_layout(frame.cmd, g_output_image.handle, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
  transform_image_layout(frame.cmd, g_blend_image.handle, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
  color_attachment.imageView = g_output_image.view;
  vkCmdBeginRendering(frame.cmd, &rendering);
  vkCmdBindPipeline(frame.cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, g_pipelines[3]);
  vkCmdDraw(frame.cmd, 3, 1, 0, 0);
  vkCmdEndRendering(frame.cmd);
}

ToolChain info

cmake version 3.30.0

// output of 'glslc --version'
shaderc v2023.8 v2025.1
spirv-tools v2025.1 v2022.4-695-gf289d047
glslang 11.1.0-1194-g8b822ee8

Target: SPIR-V 1.0

// os and other
Windows 10.0.19045, architecture 64bit
Compiler clang 18.1.8
vulkan version 1.4.309.

Externel libraries

https://github.com/libsdl-org/SDL release-3.2.12
https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git v3.2.1
https://github.com/iryoku/smaa.git master

All textures can see in RenderDoc


EDIT:I found one issue — the edge texture was flipped because y down in vulkan. I fixed it by adding #define SMAA_FLIP_Y 0. However, the blend texture still looks strange, and the final output image is not correct either.

EDIT: OK, the edge texture's problem is because SMAA_FLIP_Y, and blend weight output color is unclear compare with smaaDemo's one because I'm using UNORM rather than sRGB, use later the blendTex is clear.

Well, I think the problem is resolved, but another problem is SMAA (also MSAA) seem is not great aa in wireform shape, such as 1 pixel triangle wireform, it's not good than imgui's aa. Maybe is my use way's problem.


Solution

  • OK, I think I found the problem.

    First problem is edge detection is not right. Because I use vulkan which default coordinate system is y flip. So I need `#define SMAA_FLIP_Y 0` to make SMAA rightly processing.

    Second problem is blend weight not clear. It is because I use UNORM color format rather than sRGB format. Use later will make blend output color is clear. Well this is not influence the result output image I thinking.