javaandroidspeech-recognitionspeech-to-text

An App that counts the number of spoken words by using Speech Recognition


Is there any way to count words in an audio file using some kind of (Offline) Speech Recognition? What would be the best approach to implement something like this? And also is there any way to make it work continuous without having to restart it every time someone makes a pause while talking?


Solution

  • Here is an answer for my question for those who might need it:

    public class MainActivity extends AppCompatActivity implements
            RecognitionListener {
    
        private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
        private TextView returnedText;
        private TextView returnedError;
        private ProgressBar progressBar;
        private TextView brojtextview;
        private SpeechRecognizer speech = null;
        private Intent recognizerIntent;
        private String LOG_TAG = "VoiceRecognitionActivity";
        private int ukupanbroj;
    
        private void resetSpeechRecognizer() {
    
            if(speech != null)
                speech.destroy();
            speech = SpeechRecognizer.createSpeechRecognizer(this);
            Log.i(LOG_TAG, "isRecognitionAvailable: " + SpeechRecognizer.isRecognitionAvailable(this));
            if(SpeechRecognizer.isRecognitionAvailable(this))
                speech.setRecognitionListener(this);
            else
                finish();
        }
    
        private void setRecogniserIntent() {
    
            recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
    //        recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE,true);
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,
                    "sr-RS");
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                    RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
            recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
        }
    
        @Override
        protected void onCreate(Bundle savedInstanceState) {
    
            super.onCreate(savedInstanceState);
            setContentView(R.layout.activity_main);
    
            // UI initialisation
            returnedText = findViewById(R.id.textView1);
            returnedError = findViewById(R.id.errorView1);
            progressBar =  findViewById(R.id.progressBar1);
            brojtextview=findViewById(R.id.brojtextview);
            progressBar.setVisibility(View.INVISIBLE);
    
    
            // start speech recogniser
            resetSpeechRecognizer();
    
            // start progress bar
            progressBar.setVisibility(View.VISIBLE);
            progressBar.setIndeterminate(true);
    
            // check for permission
            int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
            if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
                ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
                return;
            }
    
            setRecogniserIntent();
            speech.startListening(recognizerIntent);
        }
    
        @Override
        public void onRequestPermissionsResult(int requestCode,
                                               @NonNull String[] permissions, @NonNull  int[] grantResults) {
            super.onRequestPermissionsResult(requestCode, permissions, grantResults);
    
            if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
                if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
                    speech.startListening(recognizerIntent);
                } else {
                    Toast.makeText(MainActivity.this, "Permission Denied!", Toast
                            .LENGTH_SHORT).show();
                    finish();
                }
            }
        }
    
    
        @Override
        public void onResume() {
            Log.i(LOG_TAG, "resume");
            super.onResume();
            resetSpeechRecognizer();
            speech.startListening(recognizerIntent);
        }
    
        @Override
        protected void onPause() {
            Log.i(LOG_TAG, "pause");
            super.onPause();
            speech.stopListening();
        }
    
        @Override
        protected void onStop() {
            Log.i(LOG_TAG, "stop");
            super.onStop();
            if (speech != null) {
                speech.destroy();
            }
        }
    
    
        @Override
        public void onBeginningOfSpeech() {
            Log.i(LOG_TAG, "onBeginningOfSpeech");
            progressBar.setIndeterminate(false);
            progressBar.setMax(10);
        }
    
        @Override
        public void onBufferReceived(byte[] buffer) {
            Log.i(LOG_TAG, "onBufferReceived: " + buffer);
        }
    
        @Override
        public void onEndOfSpeech() {
            Log.i(LOG_TAG, "onEndOfSpeech");
            progressBar.setIndeterminate(true);
            speech.stopListening();
        }
    
        @Override
        public void onResults(Bundle results) {
            Log.i(LOG_TAG, "onResults");
            ArrayList<String> matches = results
                    .getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
    
    
            String text = matches.get(0);
            returnedText.setText(text);
    
            String trimmed = text.trim();
            int words = trimmed.isEmpty() ? 0 : trimmed.split("\\s+").length;
    
    
            ukupanbroj+=words;
            brojtextview.setText( "" + ukupanbroj);
    
    
    
    
            speech.startListening(recognizerIntent);
        }
    
        @Override
        public void onError(int errorCode) {
            String errorMessage = getErrorText(errorCode);
            Log.i(LOG_TAG, "FAILED " + errorMessage);
            returnedError.setText(errorMessage);
    
            // rest voice recogniser
            resetSpeechRecognizer();
            speech.startListening(recognizerIntent);
        }
    
        @Override
        public void onEvent(int arg0, Bundle arg1) {
            Log.i(LOG_TAG, "onEvent");
        }
    
        @Override
        public void onPartialResults(Bundle arg0) {
            Log.i(LOG_TAG, "onPartialResults");
        }
    
        @Override
        public void onReadyForSpeech(Bundle arg0) {
            Log.i(LOG_TAG, "onReadyForSpeech");
        }
    
        @Override
        public void onRmsChanged(float rmsdB) {
            //Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
            progressBar.setProgress((int) rmsdB);
        }
    
        public String getErrorText(int errorCode) {
            String message;
            switch (errorCode) {
                case SpeechRecognizer.ERROR_AUDIO:
                    message = "Audio recording error";
                    break;
                case SpeechRecognizer.ERROR_CLIENT:
                    message = "Client side error";
                    break;
                case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
                    message = "Insufficient permissions";
                    break;
                case SpeechRecognizer.ERROR_NETWORK:
                    message = "Network error";
                    break;
                case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
                    message = "Network timeout";
                    break;
                case SpeechRecognizer.ERROR_NO_MATCH:
                    message = "No match";
                    break;
                case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
                    message = "RecognitionService busy";
                    break;
                case SpeechRecognizer.ERROR_SERVER:
                    message = "error from server";
                    break;
                case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
                    message = "No speech input";
                    break;
                default:
                    message = "Didn't understand, please try again.";
                    break;
            }
            return message;
        }
    }