In Swift
for iOS
, I have an array of AVURLAsset
. I pass it through a function to stitch/merge the video assets together into one final video. For each video, my goal is to overlay text centered in the video frame.
When I play the video output, the video assets merge correctly, but no text overlays for any of the video assets. I tried following an existing answer, but no success. I also tried just overlaying text to a single video following Ray Wenderlich's tutorial, but for some reason the same end result :(.
Any guidance would be extremely appreciated..
func merge(videos: [AVURLAsset], completion: @escaping (_ url: URL, _ asset: AVAssetExportSession)->()) {
let videoComposition = AVMutableComposition()
var lastTime: CMTime = .zero
var maxVideoSize = CGSize.zero
guard let videoCompositionTrack = videoComposition.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)),
let audioCompositionTrack = videoComposition.addMutableTrack(withMediaType: .audio, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else { return }
let mainComposition = AVMutableVideoComposition()
let mainParentLayer = CALayer()
let mainVideoLayer = CALayer()
mainParentLayer.frame = CGRect(x: 0, y: 0, width: maxVideoSize.width, height: maxVideoSize.height)
mainVideoLayer.frame = CGRect(x: 0, y: 0, width: maxVideoSize.width, height: maxVideoSize.height)
mainParentLayer.addSublayer(mainVideoLayer)
var instructions = [AVMutableVideoCompositionInstruction]()
print("Main Parent Layer Frame: \(mainParentLayer.frame)")
for video in videos {
if let videoTrack = video.tracks(withMediaType: .video)[safe: 0] {
videoCompositionTrack.preferredTransform = videoTrack.preferredTransform
print("Video Track Transform: \(videoTrack.preferredTransform)")
do {
try videoCompositionTrack.insertTimeRange(CMTimeRangeMake(start: .zero, duration: video.duration), of: videoTrack, at: lastTime)
print("Video time range: Start = \(lastTime.seconds), Duration = \(video.duration.seconds) [\(lastTime.seconds + video.duration.seconds > 0 ? "Right" : "Wrong")]")
if let audioTrack = video.tracks(withMediaType: .audio)[safe: 0] {
try audioCompositionTrack.insertTimeRange(CMTimeRangeMake(start: .zero, duration: video.duration), of: audioTrack, at: lastTime)
}
lastTime = CMTimeAdd(lastTime, video.duration)
let videoSize = videoTrack.naturalSize.applying(videoTrack.preferredTransform)
let videoRect = CGRect(x: 0, y: 0, width: abs(videoSize.width), height: abs(videoSize.height))
maxVideoSize = CGSize(width: max(maxVideoSize.width, videoRect.width), height: max(maxVideoSize.height, videoRect.height))
let textLayer = CATextLayer()
textLayer.string = "TESTING TESTING TESTING"
textLayer.foregroundColor = UIColor.white.cgColor
textLayer.font = UIFont(name: "Helvetica-Bold", size: min(videoRect.height / 10, 100))
textLayer.shadowOpacity = 0.5
textLayer.alignmentMode = .center
textLayer.contentsScale = UIScreen.main.scale
textLayer.isWrapped = true
let textHeight: CGFloat = min(videoRect.height / 10, 120)
let textWidth: CGFloat = videoRect.width
let xPos = (videoRect.width - textWidth) / 2
let yPos = (videoRect.height - textHeight) / 2
textLayer.frame = CGRect(x: xPos, y: yPos, width: textWidth, height: textHeight)
textLayer.zPosition = 1
print("Text Layer Frame: \(textLayer.frame) [\(textLayer.frame.width > 0 && textLayer.frame.height > 0 ? "Right" : "Wrong")]")
let parentLayer = CALayer()
parentLayer.backgroundColor = UIColor.green.cgColor // Temp background color for debugging
parentLayer.frame = videoRect
parentLayer.addSublayer(textLayer)
print("Video Layer zPosition: \(mainVideoLayer.zPosition), Text Layer zPosition: \(textLayer.zPosition) [\(textLayer.zPosition > mainVideoLayer.zPosition ? "Right" : "Wrong")]")
let videoCompositionInstruction = AVMutableVideoCompositionInstruction()
videoCompositionInstruction.timeRange = CMTimeRangeMake(start: lastTime - video.duration, duration: video.duration)
let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: videoTrack)
videoCompositionInstruction.layerInstructions = [layerInstruction]
instructions.append(videoCompositionInstruction)
parentLayer.zPosition = 0
mainParentLayer.addSublayer(parentLayer)
print("Parent Layer Frame: \(parentLayer.frame), Background Color: \(parentLayer.backgroundColor.debugDescription)")
print("Text Layer Frame: \(textLayer.frame)")
} catch {
print("Failed to insert track: \(error.localizedDescription)")
return
}
}
}
mainParentLayer.frame = CGRect(x: 0, y: 0, width: maxVideoSize.width, height: maxVideoSize.height)
mainVideoLayer.frame = mainParentLayer.frame
mainComposition.renderSize = maxVideoSize
mainComposition.instructions = instructions
mainComposition.frameDuration = CMTime(value: 1, timescale: 30)
mainComposition.animationTool = AVVideoCompositionCoreAnimationTool(postProcessingAsVideoLayer: mainVideoLayer, in: mainParentLayer)
print("Final Main Parent Layer Frame: \(mainParentLayer.frame)")
print("Number of Sublayers in Main Parent Layer: \(mainParentLayer.sublayers?.count ?? 0)")
let outputUrl = NSURL.fileURL(withPath: NSTemporaryDirectory() + "merged" + ".mp4")
print("Pre-Export Main Parent Layer Frame: \(mainParentLayer.frame)")
print("Pre-Export Number of Sublayers in Main Parent Layer: \(mainParentLayer.sublayers?.count ?? 0)")
if let sublayers = mainParentLayer.sublayers {
for (index, layer) in sublayers.enumerated() {
print("Layer \(index): \(layer), Frame: \(layer.frame), zPosition: \(layer.zPosition)")
}
}
guard let exporter = AVAssetExportSession(asset: videoComposition, presetName: AVAssetExportPresetHighestQuality) else { return }
exporter.videoComposition = mainComposition
exporter.outputURL = outputUrl
exporter.outputFileType = .mp4
exporter.shouldOptimizeForNetworkUse = true
if let videoComposition = exporter.videoComposition {
print("Export Video Composition Render Size: \(videoComposition.renderSize)")
print("Export Video Composition Frame Duration: \(videoComposition.frameDuration)")
print("Export Video Composition Instructions Count: \(videoComposition.instructions.count)")
}
exporter.exportAsynchronously {
DispatchQueue.main.async {
if let outputUrl = exporter.outputURL, exporter.status == .completed {
completion(outputUrl, exporter)
} else if let error = exporter.error {
print("Export failed: \(error.localizedDescription)")
}
}
}
play(video: exporter.asset)
}
I run your code and it seems to be exporting the text as well, however there is one thing that I noticed. I believe you want to play the exported video not the constructed asset with no text attached to it. If the last line is correct and you play with the asset of exporter that is the issue. Try to create a new asset with the exported path and play that one instead once the export is finished (from your completion block). Replace
play(video: exporter.asset)
// with
play(video: AVAsset(url: outputUrl)
In addition, if you want to play the video without exporting there is an api for that as well. Try to use AVSynchronizedLayer. Hope this helps.