flutterdartspeech-recognitionspeech-to-text

Flutter/Dart: speech to text (offline and continuous) for any language


Is there any package that I can use to create an app that can process speech to text?

It should include the following features:

So far I found this https://pub.dev/packages/speech_recognition but it says:

the iOS API sends intermediate results, On my Android device, only the final transcription is received

Other limitations: On iOS, by default the plugin is configured for French, English, Russian, Spanish, Italian. On Android, without additional installations, it will probably works only with the default device locale.

Somebody tested this package and had good results? Or do you have any other suggestions?


Solution

  • I'm using https://pub.dev/packages/speech_to_text now. It is actively maintained and it works pretty good. I think some custom code can be written to make it listen continuously.

    Edit:

    As requested, please see the continuous listening logic below. I only used it as a proof of concept, so I wouldn't recommend it for production apps. As far as I know the Android API does not support continuous listening out of the box.

    SpeechRecognitionBloc

    import 'package:bloc/bloc.dart';
    import 'package:meta/meta.dart';
    import 'package:template_mobile/core/sevices/speech_recognition_service.dart';
    import 'package:template_mobile/core/state/event/speech_recognition_event.dart';
    import 'package:template_mobile/core/state/state/speech_recognition_state.dart';
    
    class SpeechRecognitionBloc
        extends Bloc<SpeechRecognitionEvent, SpeechRecognitionState> {
      final SpeechRecognitionService speechRecognitionService;
    
      SpeechRecognitionBloc({
        @required this.speechRecognitionService,
      }) : assert(speechRecognitionService != null) {
        speechRecognitionService.errors.stream.listen((errorResult) {
          add(SpeechRecognitionErrorEvent(
            error: "${errorResult.errorMsg} - ${errorResult.permanent}",
          ));
        });
    
        speechRecognitionService.statuses.stream.listen((status) {
          if (state is SpeechRecognitionRecognizedState) {
            var currentState = state as SpeechRecognitionRecognizedState;
            if (currentState.finalResult) {
              add(SpeechRecognitionStatusChangedEvent());
            }
          }
        });
    
        speechRecognitionService.words.stream.listen((speechResult) {
          add(SpeechRecognitionRecognizedEvent(
            words: speechResult.recognizedWords,
            finalResult: speechResult.finalResult,
          ));
        });
      }
    
      @override
      SpeechRecognitionState get initialState =>
          SpeechRecognitionUninitializedState();
    
      @override
      Stream<SpeechRecognitionState> mapEventToState(
          SpeechRecognitionEvent event) async* {
        if (event is SpeechRecognitionInitEvent) {
          var hasSpeech = await speechRecognitionService.initSpeech();
          if (hasSpeech) {
            yield SpeechRecognitionAvailableState();
          } else {
            yield SpeechRecognitionUnavailableState();
          }
        }
    
        if (event is SpeechRecognitionStartPressEvent) {
          yield SpeechRecognitionStartPressedState();
          add(SpeechRecognitionStartEvent());
        }
    
        if (event is SpeechRecognitionStartEvent) {
          speechRecognitionService.startListening();
          yield SpeechRecognitionStartedState();
        }
    
        if (event is SpeechRecognitionStopPressEvent) {
          yield SpeechRecognitionStopPressedState();
          add(SpeechRecognitionStopEvent());
        }
    
        if (event is SpeechRecognitionStopEvent) {
          speechRecognitionService.stopListening();
          yield SpeechRecognitionStopedState();
        }
    
        if (event is SpeechRecognitionCancelEvent) {
          speechRecognitionService.cancelListening();
          yield SpeechRecognitionCanceledState();
        }
    
        if (event is SpeechRecognitionRecognizedEvent) {
          yield SpeechRecognitionRecognizedState(
              words: event.words, finalResult: event.finalResult);
          if (event.finalResult == true &&
              speechRecognitionService.statuses.value == 'notListening') {
            await Future.delayed(Duration(milliseconds: 50));
            add(SpeechRecognitionStatusChangedEvent());
          }
        }
    
        if (event is SpeechRecognitionErrorEvent) {
          yield SpeechRecognitionErrorState(error: event.error);
          // Just for UI updates for the state to propagates
          await Future.delayed(Duration(milliseconds: 50));
          add(SpeechRecognitionInitEvent());
          await Future.delayed(Duration(milliseconds: 50));
          add(SpeechRecognitionStartPressEvent());
        }
    
        if (event is SpeechRecognitionStatusChangedEvent) {
          yield SpeechRecognitionStatusState();
          add(SpeechRecognitionStartPressEvent());
        }
      }
    }
    

    SpeechRecognitionService

    import 'dart:async';
    
    import 'package:rxdart/rxdart.dart';
    import 'package:speech_to_text/speech_recognition_error.dart';
    import 'package:speech_to_text/speech_recognition_result.dart';
    import 'package:speech_to_text/speech_to_text.dart';
    
    class SpeechRecognitionService {
      final SpeechToText speech = SpeechToText();
    
      var errors = StreamController<SpeechRecognitionError>();
      var statuses = BehaviorSubject<String>();
      var words = StreamController<SpeechRecognitionResult>();
    
      var _localeId = '';
    
      Future<bool> initSpeech() async {
        bool hasSpeech = await speech.initialize(
          onError: errorListener,
          onStatus: statusListener,
        );
    
        if (hasSpeech) {
          var systemLocale = await speech.systemLocale();
          _localeId = systemLocale.localeId;
        }
    
        return hasSpeech;
      }
    
      void startListening() {
        speech.stop();
        speech.listen(
            onResult: resultListener,
            listenFor: Duration(minutes: 1),
            localeId: _localeId,
            onSoundLevelChange: null,
            cancelOnError: true,
            partialResults: true);
      }
    
      void errorListener(SpeechRecognitionError error) {
        errors.add(error);
      }
    
      void statusListener(String status) {
        statuses.add(status);
      }
    
      void resultListener(SpeechRecognitionResult result) {
        words.add(result);
      }
    
      void stopListening() {
        speech.stop();
      }
    
      void cancelListening() {
        speech.cancel();
      }
    }