c++winapitext-to-speechsapi

How can I change the code to English text to speak instead of Japanese?


At the moment, I have a code for text to speech, but I want to change the code so the output will be in English instead of a Japanese speaker what is currently the case. Is anyone familiar with this and can help me

// SAPIの初期化
if (FAILED(CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice))) 
{
    throw;
}


// Register events to detect
pVoice->SetInterest(SPFEI(SPEI_PHONEME) | SPFEI(SPEI_END_INPUT_STREAM),
    SPFEI(SPEI_PHONEME) | SPFEI(SPEI_END_INPUT_STREAM));
    pVoice->SetNotifyWindowMessage(hWnd, WM_USER, 0, 0);    // subscribe message

// preprocessing to speak
pVoice->Speak( NULL , SPF_PURGEBEFORESPEAK , 0 );   

}

Solution

  • XML mode

    hr = voice->Speak(
            L"<sapi><voice required=\"Language=409\">Hello World</voice></sapi>",
            SPF_IS_XML,
            0 );
    

    or

    hr = voice->Speak(
            L"<sapi><lang langid=\"409\">Hello World</lang></sapi>",
            SPF_IS_XML,
            0 );
    

    Handcoded mode

    bool SetFirstFitNarrator( ISpVoice* voice, const WCHAR* language )
    {
        HRESULT                         hr;
        ISpObjectToken*                 token;
        CComPtr<IEnumSpObjectTokens>    cpEnum;
    
        hr = SpEnumTokens( SPCAT_VOICES, language, 0, &cpEnum );
        if ( hr != S_OK )
            return false;
    
        while ( cpEnum->Next( 1, &token, NULL ) == S_OK )
        {
            // We have narrator handling this language, try it
            HRESULT hr = voice->SetVoice( token );
            if ( hr == S_OK )
            {
                // Some sanity tests, in most cases not nessesary.
                // I'm using them to detect broken narrators.
                // You can just
                //    return true;
                // instead.
                hr = voice->Speak( 0, SPF_PURGEBEFORESPEAK, 0 );
                if ( hr == S_OK )
                {
                    hr = voice->Speak( L" ", SPF_ASYNC | SPF_IS_NOT_XML, 0 );
                    if ( hr == S_OK )
                    {
                        // Narrator seems to work OK
                        retrun true;
                    }
                }
            }
    
            // Alternatively you can collect narrators in some table
            // and offer them to user for selection.
            //
            // This is how you can obtain user friendly description, eg.
            // "Microsoft David Desktop - English (United States)"
            //
            //  CSpDynamicString dstrDesc;
            //  hr = SpGetDescription( token, &dstrDesc );
            //  if ( hr == S_OK )
            //  {
            //      const WCHAR*    user_friendly_description = dstrDesc;
        }
    
        // None of the narrators can handle given language
        return false;
    }
    
    void Test()
    {
        // "Language=409" - means US English
        if ( SetFirstFitNarrator( voice, L"Language=409" ) )
            voice->Speak( L"hello world ", SPF_IS_NOT_XML, 0 );
        else
        {
            MessageBox(
                    hwnd,
                    L"Unfortunately there is no narrator handling US English language.",
                    L"No narrator",
                    MB_OK | MB_ICONEXCLAMATION );
        }
    }
    

    For language ids download PDF from [MS-LCID]: Windows Language Code Identifier (LCID) Reference