pythonpyqt5wavwatson-text-to-speech

Edit wav file headers for use with QSound/pyqt5 from within python (Watson Text To Speech TTS)


QSound from pyqt5 has been giving me trouble, some wav files work well. Others cause the Qt app to error and not run. I have with research narrowed the culprit down to the headers of the wav files.

If I open the wav file in Audacity and export it as a wav file... the exported wav file works perfectly. However I need a solution that runs from within my python script.

I am getting my wav files from Watson's Text-To-Speech api, not sure if I can control what headers it includes.

import sys
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow
from PyQt5.QtCore import Qt
from PyQt5.QtMultimedia import QSound

from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator


def list_to_speech(text, language='ja-JP_EmiV3Voice'):
    api_key = "my_api_key"
    url = "url"

    # Set up service
    authenticator = IAMAuthenticator(api_key)
    # Now TTS service
    tts = TextToSpeechV1(authenticator=authenticator)
    # Set Service URL
    tts.set_service_url(url)
    with open('text_to_speech.wav', 'wb') as audio_file:
        res = tts.synthesize(text, accept='audio/wav', voice=language).get_result()
        audio_file.write(res.content)


class MainWindow(QMainWindow):
    def __init__(self, *args, **kwargs):
        super(MainWindow, self).__init__(*args, **kwargs)
        self.sound = QSound("text_to_speech.wav")
        self.sound.play()

        label = QLabel("This PyQt5 window will (try to) play the wav file!")
        label.setAlignment(Qt.AlignCenter)

        self.setCentralWidget(label)


if __name__ == "__main__":
    # the file saved by list_to_speech won't play as QSound(text_to_speech.wav).play()
    # (instead it crashes the app before opening)
    # 
    # if I open the text_to_speech.wav file in Audacity and export it with empty headers,
    # then comment out next line, it works.
    list_to_speech("ありがとう")
    app = QApplication(sys.argv)
    window = MainWindow()
    window.show()
    app.exec_()

Solution

  • A possible solution is not to use QSound but rather QMediaPlayer that allows handling other codecs:

    import os
    import sys
    
    from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow
    from PyQt5.QtCore import Qt, QUrl
    from PyQt5.QtMultimedia import QMediaPlayer, QMediaContent
    
    CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
    
    # ...
    
    class MainWindow(QMainWindow):
        def __init__(self, *args, **kwargs):
            super(MainWindow, self).__init__(*args, **kwargs)
    
            filename = os.path.join(CURRENT_DIR, "text_to_speech.wav")
    
            self.player = QMediaPlayer()
            url = QUrl.fromLocalFile(filename)
            self.player.setMedia(QMediaContent(url))
            self.player.play()
    
            label = QLabel("This PyQt5 window will (try to) play the wav file!")
            label.setAlignment(Qt.AlignCenter)
    
            self.setCentralWidget(label)
    
    # ...
    

    Note: Another option is to use another format like mp3.