I'm developing a C++ application which implements the Microsoft Speech API (SAPI). I developed numerous functions related to Text-To-Speech. Among them a function which allows the listing of the audio outputs, and a function that allows to define an audio output.
I started developing this program on Windows 7, but now I switched to Windows 10. However, the function that defines the audio output doesn't work anymore. I didn't edited a thing in my code, and on Windows 7 it worked perfectly.
Here is the code which lists the available audio outputs
int getAudioOut( int auOut ) //get audio outputs function
{
if( SUCCEEDED( hr ) )
{
//Enumerate Audio Outputs
hr = SpEnumTokens( SPCAT_AUDIOOUT, NULL, NULL, &cpEnum );
cpEnum->GetCount( &vCount );
cpEnum->Item( saveAudio, &cpAudioOutToken );
SpGetDescription( cpAudioOutToken, &dynStr );
printf( "Defined audio output is: %ls\n\n", dynStr );
dynStr.Clear();
//Loop through the audio output list and enumerate them all
for( audioOut = 0; audioOut <= vCount - 1; audioOut++ )
{
cpAudioOutToken.Release();
cpEnum->Item( audioOut, &cpAudioOutToken );
SpGetDescription( cpAudioOutToken, &dynStr );
printf( "Defined Audio Output %i - %ls\n", audioOut, dynStr );
dynStr.Clear();
}
printf( "\n" );
audioOut = saveAudio;
cpEnum.Release();
cpAudioOutToken.Release();
}
else
{
printf( "Could not enumerate available audio outputs\n" );
}
return true;
}
Here is the code which allows the definition of an audio output
int setAudioOut( int auOut ) //define audio output function
{
if( SUCCEEDED( hr ) )
{
hr = SpEnumTokens( SPCAT_AUDIOOUT, NULL, NULL, &cpEnum );
cpEnum->GetCount( &vCount );
size_t nOut = auOut;
if( nOut >= vCount )
{
cout << "Not so many audio outputs available! Try again\n" << endl;
}
else
{
cout << "Success" << endl;
}
ULONG audioOut = static_cast<ULONG>( nOut ); //convert nOut to ULONG audioOut
cpEnum->Item( audioOut, &cpAudioOutToken );
SpGetDescription( cpAudioOutToken, &dynStr );
printf( "You chose %ls\n\n", dynStr );
cpVoice->SetOutput( cpAudioOutToken, TRUE ); //Initialization of the Audio Output
dynStr.Clear();
cpEnum.Release();
cpAudioOutToken.Release();
saveAudio = audioOut; //define saveAudio to audioOut value
}
else
{
printf( "Could not set audio output\n" );
}
return true;
}
When I start my program and call the getAudioOut
function, I get following listing:
The first line shows the default audio output, and the two below are the available outputs. On Windows 7, when I set the second audio output (Lautsprecher / Kopfhörer) as default, then no sound comes out from the first (Digitalaudio), which makes sense. However, on Windows 10 I have reproduced the same procedure but it doesn't work. The audio output is always defined according to the audio menu.
My question is, does anyone have experienced this issue? Is there an alternative to define an audio output programmatically?
I edited the code as @NikolayShmyrev suggested but it didn't change a thing. However, I continued to dig into the problem an found out that the issue came from another function. Indeed, when I switched from Windows 7 to Windows 10, I encountered other problems with the speech synthesis function and the speech to WAV file function. When I started the program and called the Text-To-Speech
function, everything worked great. When I called the Speech2Wav
function, it worked too. However, when I recalled the Text-To-Speech
function, the variable HRESULT hr = S_OK;
changed its value and no sound was played. The value of hr
was set to -2147200968 which corresponds to Error 0x80045038: SPERR_STREAM_CLOSED (source/list of error codes)
To solve this issue I had to define an audio output like this cpVoice->SetOutput( cpAudioOutToken, TRUE );
in the Text-To-Speech
function.
This brings us back to the problem I stated above. When I set the audio output in the function setAudioOut
, I release its value at the end cpAudioOutToken.Release();
However, I reuse the same variable in the Text-To-Speech
function. Its value was set to nothing because I released it when I defined the audio output. This is why the audio output was always set to default. In order to solve the problem, I assigned the value of cpAudioOutToken
to another variable called cpSpeechOutToken
.
Here is the code for the function
setAudioOut
int setAudioOut( int auOut ) //define audio output function
{
if( SUCCEEDED( hr ) )
{
hr = SpEnumTokens( SPCAT_AUDIOOUT, NULL, NULL, &cpEnum );
cpEnum->GetCount( &vCount );
size_t nOut = auOut;
if( nOut >= vCount )
{
cout << "Not so many audio outputs available! Try again\n" << endl;
return 0;
}
else
{
cout << "Success" << endl;
}
ULONG audioOut = static_cast<ULONG>( nOut ); //convert nOut to ULONG audioOut
cpEnum->Item( audioOut, &cpAudioOutToken );
SpGetDescription( cpAudioOutToken, &dynStr );
printf( "You chose %ls\n\n", dynStr );
cpVoice->SetOutput( cpAudioOutToken, TRUE ); //Initialization of the Audio Output
dynStr.Clear();
cpEnum.Release();
cpSpeechOutToken = cpAudioOutToken;
cpAudioOutToken.Release();
saveAudio = audioOut; //define saveAudio to audioOut value
}
else
{
printf( "Could not set audio output\n" );
}
return true;
}
Here is the code from the
Text-To-Speech
function
int ttsSpeak( const char* text ) //Text to Speech speaking function
{
if( SUCCEEDED( hr ) )
{
string xmlSentence( text );
hr = SpEnumTokens( SPCAT_VOICES_WIN10, NULL, NULL, &cpEnum );
//Replace SPCAT_VOICES_WIN10 with SPCAT_VOICES if you want to use it on Windows 7
cpEnum->Item( saveVoice, &cpVoiceToken ); //get saveVoice token defined at line 175
cpVoice->SetVoice( cpVoiceToken ); //Initialization of the voice
//string strText( text );
int wchars_num = MultiByteToWideChar( CP_ACP, 0, xmlSentence.c_str(), -1, NULL, 0 );
wchar_t* wstr = new wchar_t[ wchars_num ];
MultiByteToWideChar( CP_ACP, 0, xmlSentence.c_str(), -1, wstr, wchars_num );
printf( "Text To Speech processing\n" );
cpVoice->SetOutput( cpSpeechOutToken, TRUE );
hr = cpVoice->Speak( wstr, SVSFIsXML, NULL );
saveText = xmlSentence.c_str();
cpEnum.Release();
cpVoiceToken.Release();
delete new wchar_t[ wchars_num ];
}
else
{
printf( "Could not speak entered text\n" );
}
return true;
}