I am working on an ARKit project where I use ARView
(or ARSCNView
) to place nodes in the real world. I want to apply a black and white filter to the camera feed background while keeping my 3D node objects colored. I heard this can be done using Metal shaders or custom rendering, but I’m unsure how to implement this.
Could someone guide me on how to achieve this effect or share a working demo or code snippet?
I am using Swift for development. Any help would be appreciated.
Here’s a basic setup of my ARSCNView
with node placement:
import UIKit
import ARKit
class ViewController: UIViewController, ARSCNViewDelegate {
@IBOutlet var sceneView: ARSCNView!
override func viewDidLoad() {
super.viewDidLoad()
let configuration = ARWorldTrackingConfiguration()
sceneView.session.run(configuration)
sceneView.delegate = self
let tapGesture = UITapGestureRecognizer(target: self, action: #selector(handleTap(_:)))
sceneView.addGestureRecognizer(tapGesture)
}
@objc func handleTap(_ gesture: UITapGestureRecognizer) {
let location = gesture.location(in: sceneView)
let hitResults = sceneView.hitTest(location, types: .featurePoint)
if let hitResult = hitResults.first {
let sphere = SCNSphere(radius: 0.05)
sphere.firstMaterial?.diffuse.contents = UIColor.blue
let node = SCNNode(geometry: sphere)
node.position = SCNVector3(hitResult.worldTransform.columns.3.x,
hitResult.worldTransform.columns.3.y,
hitResult.worldTransform.columns.3.z)
sceneView.scene.rootNode.addChildNode(node)
}
}
}
What I’ve Tried
I looked into custom fragment shaders with Metal, but I’m not sure how to apply it only to the camera feed while keeping the nodes unaffected. Most examples affect the entire view.
this is the SceneFilterTechnique.metal file which i used to create grayscale
#include <metal_stdlib>
using namespace metal;
typedef struct {
float4 renderedCoordinate [[position]];
float2 textureCoordinate;
} TextureMappingVertex;
vertex TextureMappingVertex mapTexture(unsigned int vertex_id [[ vertex_id ]]) {
float4x4 renderedCoordinates = float4x4(float4( -1.0, -1.0, 0.0, 1.0 ),
float4( 1.0, -1.0, 0.0, 1.0 ),
float4( -1.0, 1.0, 0.0, 1.0 ),
float4( 1.0, 1.0, 0.0, 1.0 ));
float4x2 textureCoordinates = float4x2(float2( 0.0, 1.0 ),
float2( 1.0, 1.0 ),
float2( 0.0, 0.0 ),
float2( 1.0, 0.0 ));
TextureMappingVertex outVertex;
outVertex.renderedCoordinate = renderedCoordinates[vertex_id];
outVertex.textureCoordinate = textureCoordinates[vertex_id];
return outVertex;
}
fragment half4 displayTexture(TextureMappingVertex mappingVertex [[ stage_in ]],
texture2d<float, access::sample> texture [[ texture(0) ]]) {
constexpr sampler s(address::clamp_to_edge, filter::linear);
float4 color = texture.sample(s, mappingVertex.textureCoordinate);
float grayscale = (color.r + color.g + color.b) / 3.0;
return half4(grayscale, grayscale, grayscale, color.a);
}
so when i try to add this in sceneView.technique = filterTechnique
it's show White Screen
This is the Property List File with same Name as metal file
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>sequence</key>
<array>
<string>apply_filter</string>
</array>
<key>passes</key>
<dict>
<key>apply_filter</key>
<dict>
<key>metalVertexShader</key>
<string>mapTexture</string>
<key>metalFragmentShader</key>
<string>displayTexture</string>
<key>draw</key>
<string>DRAW_QUAD</string>
<key>inputs</key>
<dict>
<key>scene</key>
<string>COLOR</string>
</dict>
<key>outputs</key>
<dict>
<key>color</key>
<string>COLOR</string>
</dict>
</dict>
</dict>
</dict>
</plist>
so i have a function to create Technique
private func makeTechnique(fromPlistNamed plistName: String) -> SCNTechnique {
guard let url = Bundle.main.url(forResource: plistName, withExtension: "plist") else {
fatalError("\(plistName).plist does not exist in the main bundle")
}
guard let dictionary = NSDictionary(contentsOf: url) as? [String: Any] else {
fatalError("Failed to parse \(plistName).plist as a dictionary")
}
guard let technique = SCNTechnique(dictionary: dictionary) else {
fatalError("Failed to initialize a technique using \(plistName).plist")
}
return technique
}
and i am than adding that technique to sceneView as Follow
let filterTechnique = makeTechnique(fromPlistNamed: "SceneFilterTechnique")
sceneView.technique = filterTechnique
i attached output image too
I'll try to give an answer here. Displaying the AR image in grayscale is more complicated than one might think. After several attempts, I came up with the following solution. I hope it can help you to achieve, what you want:
I took an out of the Box ARKit/SceneKit/Swift
driven Project. (the one with the 3D spaceship flying in space and the color AR feed)
I removed the ship and here is how you can implement the solution:
ViewController.swift
import UIKit
import SceneKit
import ARKit
import MetalKit
struct Uniforms {
var scaleX: Float
var scaleY: Float
}
class ViewController: UIViewController, ARSCNViewDelegate {
@IBOutlet var sceneView: ARSCNView!
var metalView: MTKView!
var commandQueue: MTLCommandQueue!
var device: MTLDevice!
var grayscalePipelineState: MTLRenderPipelineState!
var uniforms = Uniforms(scaleX: 1.0, scaleY: 1.0) // to pass to the shader
private var textureCache: CVMetalTextureCache!
private var grayscaleTexture: MTLTexture?
private var transformBuffer: MTLBuffer?
override func viewDidLoad() {
super.viewDidLoad()
// ARKit Setup
let configuration = ARWorldTrackingConfiguration()
sceneView.session.run(configuration)
sceneView.delegate = self
sceneView.scene = SCNScene()
// Turn off ARKit's color background feed
sceneView.scene.background.contents = nil // UIColor.clear
sceneView.backgroundColor = UIColor.clear
sceneView.automaticallyUpdatesLighting = false
sceneView.layer.isOpaque = false
// Metal Device / Command Queue
device = MTLCreateSystemDefaultDevice()
commandQueue = device.makeCommandQueue()
// MTKView
metalView = MTKView(frame: view.bounds, device: device)
metalView.framebufferOnly = false
metalView.delegate = self
metalView.isOpaque = false
// Put it behind the SceneView so the 3D geometry is on top
view.insertSubview(metalView, belowSubview: sceneView)
// Build pipeline
self.setupMetalPipeline()
// Create a single CVMetalTextureCache
CVMetalTextureCacheCreate(
kCFAllocatorDefault,
nil,
device,
nil,
&textureCache
)
createOffscreenTexture()
// add your gesture for placing nodes
let tapGesture = UITapGestureRecognizer(target: self, action: #selector(handleTap(_:)))
sceneView.addGestureRecognizer(tapGesture)
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
// Pause the view's session
sceneView.session.pause()
}
func setupMetalPipeline() {
guard let library = device.makeDefaultLibrary() else { return }
let desc = MTLRenderPipelineDescriptor()
desc.vertexFunction = library.makeFunction(name: "aspectFitVertex")
desc.fragmentFunction = library.makeFunction(name: "grayscaleFragment")
desc.colorAttachments[0].pixelFormat = metalView.colorPixelFormat
do {
grayscalePipelineState = try device.makeRenderPipelineState(descriptor: desc)
} catch {
fatalError("Error creating pipeline state: \(error)")
}
}
@objc func handleTap(_ gesture: UITapGestureRecognizer) {
let location = gesture.location(in: sceneView)
// Create a raycast query
guard let query = sceneView.raycastQuery(from: location,
allowing: .estimatedPlane,
alignment: .any) else {
return
}
// Perform the raycast
let results = sceneView.session.raycast(query)
if let result = results.first {
let sphere = SCNSphere(radius: 0.05)
sphere.firstMaterial?.diffuse.contents = UIColor.blue
let node = SCNNode(geometry: sphere)
node.position = SCNVector3(result.worldTransform.columns.3.x,
result.worldTransform.columns.3.y,
result.worldTransform.columns.3.z)
sceneView.scene.rootNode.addChildNode(node)
}
}
//Converts ARKit's Y-plane to an MTLTexture with .r8Unorm format
func makeYTexture(from pixelBuffer: CVPixelBuffer) -> MTLTexture? {
let width = CVPixelBufferGetWidthOfPlane(pixelBuffer, 0)
let height = CVPixelBufferGetHeightOfPlane(pixelBuffer, 0)
var cvMetalTexture: CVMetalTexture?
let status = CVMetalTextureCacheCreateTextureFromImage(
kCFAllocatorDefault,
textureCache,
pixelBuffer,
nil,
.r8Unorm,
width,
height,
0, // plane index = 0 => Luma
&cvMetalTexture
)
guard status == kCVReturnSuccess, let cvMetalTexture = cvMetalTexture else {
return nil
}
return CVMetalTextureGetTexture(cvMetalTexture)
}
// MARK: - ARSCNViewDelegate
func session(_ session: ARSession, didFailWithError error: Error) {
// Present an error message to the user
}
func sessionWasInterrupted(_ session: ARSession) {
// Inform the user that the session has been interrupted, for example, by presenting an overlay
}
func sessionInterruptionEnded(_ session: ARSession) {
// Reset tracking and/or remove existing anchors if consistent tracking is required
}
func createOffscreenTexture() {
let descriptor = MTLTextureDescriptor.texture2DDescriptor(
pixelFormat: .bgra8Unorm,
width: Int(sceneView.bounds.width * UIScreen.main.scale),
height: Int(sceneView.bounds.height * UIScreen.main.scale),
mipmapped: false
)
descriptor.usage = [.renderTarget, .shaderRead]
descriptor.storageMode = .private
grayscaleTexture = device.makeTexture(descriptor: descriptor)
}
}
extension ViewController: MTKViewDelegate {
func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
// adjust any viewport...
}
func draw(in view: MTKView) {
guard let currentFrame = sceneView.session.currentFrame else { return }
let pixelBuffer = currentFrame.capturedImage
guard let yTexture = makeYTexture(from: pixelBuffer),
let grayscaleTexture = grayscaleTexture else { return }
// Create a command buffer
guard let commandBuffer = commandQueue.makeCommandBuffer() else { return }
// Create a render pass descriptor for the offscreen texture
let rpd = MTLRenderPassDescriptor()
rpd.colorAttachments[0].texture = grayscaleTexture
rpd.colorAttachments[0].loadAction = .clear
rpd.colorAttachments[0].clearColor = MTLClearColorMake(0, 0, 0, 1)
rpd.colorAttachments[0].storeAction = .store
// Compute aspect-fill scaling
// Because we rotate 90° in the fragment, swap W & H: <- IMPORTANT !!!
let h = Float(CVPixelBufferGetWidthOfPlane(pixelBuffer, 0))
let w = Float(CVPixelBufferGetHeightOfPlane(pixelBuffer, 0))
let cameraAspect = w / h
let screenW = Float(view.drawableSize.width)
let screenH = Float(view.drawableSize.height)
let screenAspect = screenW / screenH
var scaleX: Float = 1.0
var scaleY: Float = 1.0
if screenAspect > cameraAspect {
// Fit width, crop height
scaleX = 1.0
scaleY = screenAspect / cameraAspect
} else {
// Fit height, crop width
scaleX = cameraAspect / screenAspect
scaleY = 1.0
}
// Pass scaling factors to the vertex shader
var uniforms = Uniforms(scaleX: scaleX, scaleY: scaleY)
let uniformBuffer = device.makeBuffer(bytes: &uniforms, length: MemoryLayout<Uniforms>.stride, options: [])
// Render the grayscale output
let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: rpd)!
encoder.setRenderPipelineState(grayscalePipelineState)
encoder.setFragmentTexture(yTexture, index: 0)
encoder.setVertexBuffer(uniformBuffer, offset: 0, index: 1)
encoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
encoder.endEncoding()
// Commit the command buffer
commandBuffer.commit()
// Update the SceneKit background with the rendered texture
DispatchQueue.main.async {
self.sceneView.scene.background.contents = self.grayscaleTexture
}
}
}
Then create a shaders.metal file and paste the following content:
shaders.metal
#include <metal_stdlib>
using namespace metal;
struct QuadVertexIn {
float2 uv [[ attribute(0) ]];
};
struct VertexOut {
float4 position [[position]];
float2 texCoord;
};
// Uniforms structure for scaling
struct Uniforms {
float scaleX;
float scaleY;
};
vertex VertexOut aspectFitVertex(uint vid [[vertex_id]],
constant Uniforms &u [[buffer(1)]])
{
// A standard full-screen quad in clip space:
float4 positions[4] = {
float4(-1, 1, 0, 1),
float4(-1, -1, 0, 1),
float4( 1, 1, 0, 1),
float4( 1, -1, 0, 1)
};
// Texcoords typically go top-left => (0,0), bottom-right => (1,1)
// Depending on orientation, you might flip Y or apply rotation
float2 texCoords[4] = {
float2(0, 0),
float2(0, 1),
float2(1, 0),
float2(1, 1)
};
// Apply letterbox scale
positions[vid].x *= u.scaleX;
positions[vid].y *= u.scaleY;
VertexOut out;
out.position = positions[vid];
out.texCoord = texCoords[vid];
return out;
}
fragment float4 grayscaleFragment(VertexOut in [[stage_in]],
texture2d<float> yTex [[texture(0)]]) {
constexpr sampler s(address::clamp_to_edge, // We need that line twice (very recommended)
address::clamp_to_edge, // We need that line twice (very recommended)
mag_filter::linear,
min_filter::linear);
// Rotate the texture coordinates 90° CW if necessary (that's because of how the camera feed is hadled on devices)
float2 rotated = float2(in.texCoord.y, 1.0 - in.texCoord.x);
// Sample the luminance (Y-plane)
float luma = yTex.sample(s, rotated).r;
// OPTIONAL: Darken the image
float darkeningFactor = 0.7; // Adjust this for brightness (0.5..0.8)
luma *= darkeningFactor;
// OPTIONAL: Apply contrast adjustment
float contrast = 1.3; // Play with contrast
float midpoint = 0.5; // Pivot for adjustment
luma = (luma - midpoint) * contrast + midpoint;
// OPTIONAL: Clamp to valid range (0, 1)
luma = clamp(luma, 0.0, 1.0);
return float4(luma, luma, luma, 1.0);
}
Example Screenshot: