I'm trying to process a folder with audio files through speech to text recognition on MacOS.
If I just process one file, it works, but if I feed multiple files, only one file works and throws an error for rest.
I thought I could use DispatchGroup, but it still feeds everything at once instead of waiting for each item to be completed.
Could someone help me to understand what I'm doing wrong?
let recognizer = SFSpeechRecognizer()
recognizer?.supportsOnDeviceRecognition = true
let group = DispatchGroup()
let fd = FileManager.default
fd.enumerator(at: url, includingPropertiesForKeys: nil)?.forEach({ (e) in
if let url = e as? URL, url.pathExtension == "wav" || url.pathExtension == "aiff" {
let request = SFSpeechURLRecognitionRequest(url: url)
group.enter()
let task = recognizer?.recognitionTask(with: request) { (result, error) in
print("Transcribing \(url.lastPathComponent)")
guard let result = result else {
print("\(url.lastPathComponent): No message")
group.leave()
return
}
while result.isFinal == false {
sleep(1)
}
print("\(url.lastPathComponent): \(result.bestTranscription.formattedString)")
group.leave()
}
group.wait()
}
}
group.notify(queue: .main) {
print("Done")
}
Update: I tried DispatchQueue, but it transcribes only one file and hangs.
let recognizer = SFSpeechRecognizer()
recognizer?.supportsOnDeviceRecognition = true
let fd = FileManager.default
let q = DispatchQueue(label: "serial q")
fd.enumerator(at: url, includingPropertiesForKeys: nil)?.forEach({ (e) in
if let url = e as? URL, url.pathExtension == "wav" {
let request = SFSpeechURLRecognitionRequest(url: url)
q.sync {
let task = recognizer?.recognitionTask(with: request) { (result, error) in
guard let result = result else {
print("\(url.lastPathComponent): No message")
return
}
if result.isFinal {
print("\(url.lastPathComponent): \(result.bestTranscription.formattedString)")
}
}
}
}
})
print("Done")
This is a async/await
solution with a Continuation
. It runs sequentially.
let recognizer = SFSpeechRecognizer()
recognizer?.supportsOnDeviceRecognition = true
let fd = FileManager.default
let enumerator = fd.enumerator(at: url, includingPropertiesForKeys: nil, options: .skipsHiddenFiles)!
Task {
for case let fileURL as URL in enumerator where ["wav", "aiff"].contains(fileURL.pathExtension) {
do {
try await recognizeText(at: fileURL)
} catch {
print(error)
}
}
}
func recognizeText(at url: URL) async throws {
return try await withCheckedThrowingContinuation { (continuation : CheckedContinuation<Void, Error>) in
let request = SFSpeechURLRecognitionRequest(url: url)
let task = recognizer?.recognitionTask(with: request) { (result, error) in
print("Transcribing \(url.lastPathComponent)")
if let error = error {
continuation.resume(throwing: error)
print("\(url.lastPathComponent): No message")
} else {
print("\(url.lastPathComponent): \(result!.bestTranscription.formattedString)")
if result!.isFinal {
continuation.resume(returning: ())
}
}
}
}
}