swiftmacossfspeechrecognizerdispatchgroup

In Swift, how do you loop through a list and hand one item at a time to a function with completion closure?


I'm trying to process a folder with audio files through speech to text recognition on MacOS.

If I just process one file, it works, but if I feed multiple files, only one file works and throws an error for rest.

I thought I could use DispatchGroup, but it still feeds everything at once instead of waiting for each item to be completed.

Could someone help me to understand what I'm doing wrong?

let recognizer = SFSpeechRecognizer()
recognizer?.supportsOnDeviceRecognition = true
let group = DispatchGroup()
let fd = FileManager.default
fd.enumerator(at: url, includingPropertiesForKeys: nil)?.forEach({ (e) in
    if let url = e as? URL, url.pathExtension == "wav" || url.pathExtension == "aiff" {
        let request = SFSpeechURLRecognitionRequest(url: url)
        group.enter()
        let task =  recognizer?.recognitionTask(with: request) { (result, error) in
            print("Transcribing \(url.lastPathComponent)")
            guard let result = result else {
                print("\(url.lastPathComponent): No message")
                group.leave()
                return
            }
            while  result.isFinal == false {
                sleep(1)
            }
            print("\(url.lastPathComponent): \(result.bestTranscription.formattedString)")
            group.leave()
        }
        group.wait()
    }
}
group.notify(queue: .main) {
    print("Done")
}

Update: I tried DispatchQueue, but it transcribes only one file and hangs.

let recognizer = SFSpeechRecognizer()
recognizer?.supportsOnDeviceRecognition = true
let fd = FileManager.default
let q = DispatchQueue(label: "serial q")
fd.enumerator(at: url, includingPropertiesForKeys: nil)?.forEach({ (e) in
    if let url = e as? URL, url.pathExtension == "wav" {
        let request = SFSpeechURLRecognitionRequest(url: url)
        q.sync {
            let task =  recognizer?.recognitionTask(with: request) { (result, error) in
                guard let result = result else {
                    print("\(url.lastPathComponent): No message")
                    return
                }
                if result.isFinal {
                    print("\(url.lastPathComponent): \(result.bestTranscription.formattedString)")
                }
            }
        }
    }
})
print("Done")

Solution

  • This is a async/await solution with a Continuation. It runs sequentially.

    let recognizer = SFSpeechRecognizer()
    recognizer?.supportsOnDeviceRecognition = true
    
    let fd = FileManager.default
    let enumerator = fd.enumerator(at: url, includingPropertiesForKeys: nil, options: .skipsHiddenFiles)!
    Task {
        for case let fileURL as URL in enumerator where ["wav", "aiff"].contains(fileURL.pathExtension) {
            do {
                try await recognizeText(at: fileURL)
            } catch {
                print(error)
            }
        }
    }
    
    
    func recognizeText(at url: URL) async throws {
        return try await withCheckedThrowingContinuation { (continuation : CheckedContinuation<Void, Error>) in
            let request = SFSpeechURLRecognitionRequest(url: url)
            let task =  recognizer?.recognitionTask(with: request) { (result, error) in
                print("Transcribing \(url.lastPathComponent)")
                if let error = error {
                    continuation.resume(throwing: error)
                    print("\(url.lastPathComponent): No message")
                } else {
                    print("\(url.lastPathComponent): \(result!.bestTranscription.formattedString)")
                    if result!.isFinal {
                        continuation.resume(returning: ())
                    }
                }
            }
        }
    }