Is there any way to count words in an audio file using some kind of (Offline) Speech Recognition? What would be the best approach to implement something like this? And also is there any way to make it work continuous without having to restart it every time someone makes a pause while talking?
Here is an answer for my question for those who might need it:
public class MainActivity extends AppCompatActivity implements
RecognitionListener {
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private TextView returnedText;
private TextView returnedError;
private ProgressBar progressBar;
private TextView brojtextview;
private SpeechRecognizer speech = null;
private Intent recognizerIntent;
private String LOG_TAG = "VoiceRecognitionActivity";
private int ukupanbroj;
private void resetSpeechRecognizer() {
if(speech != null)
speech.destroy();
speech = SpeechRecognizer.createSpeechRecognizer(this);
Log.i(LOG_TAG, "isRecognitionAvailable: " + SpeechRecognizer.isRecognitionAvailable(this));
if(SpeechRecognizer.isRecognitionAvailable(this))
speech.setRecognitionListener(this);
else
finish();
}
private void setRecogniserIntent() {
recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
// recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE,true);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,
"sr-RS");
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// UI initialisation
returnedText = findViewById(R.id.textView1);
returnedError = findViewById(R.id.errorView1);
progressBar = findViewById(R.id.progressBar1);
brojtextview=findViewById(R.id.brojtextview);
progressBar.setVisibility(View.INVISIBLE);
// start speech recogniser
resetSpeechRecognizer();
// start progress bar
progressBar.setVisibility(View.VISIBLE);
progressBar.setIndeterminate(true);
// check for permission
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
return;
}
setRecogniserIntent();
speech.startListening(recognizerIntent);
}
@Override
public void onRequestPermissionsResult(int requestCode,
@NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
speech.startListening(recognizerIntent);
} else {
Toast.makeText(MainActivity.this, "Permission Denied!", Toast
.LENGTH_SHORT).show();
finish();
}
}
}
@Override
public void onResume() {
Log.i(LOG_TAG, "resume");
super.onResume();
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
protected void onPause() {
Log.i(LOG_TAG, "pause");
super.onPause();
speech.stopListening();
}
@Override
protected void onStop() {
Log.i(LOG_TAG, "stop");
super.onStop();
if (speech != null) {
speech.destroy();
}
}
@Override
public void onBeginningOfSpeech() {
Log.i(LOG_TAG, "onBeginningOfSpeech");
progressBar.setIndeterminate(false);
progressBar.setMax(10);
}
@Override
public void onBufferReceived(byte[] buffer) {
Log.i(LOG_TAG, "onBufferReceived: " + buffer);
}
@Override
public void onEndOfSpeech() {
Log.i(LOG_TAG, "onEndOfSpeech");
progressBar.setIndeterminate(true);
speech.stopListening();
}
@Override
public void onResults(Bundle results) {
Log.i(LOG_TAG, "onResults");
ArrayList<String> matches = results
.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String text = matches.get(0);
returnedText.setText(text);
String trimmed = text.trim();
int words = trimmed.isEmpty() ? 0 : trimmed.split("\\s+").length;
ukupanbroj+=words;
brojtextview.setText( "" + ukupanbroj);
speech.startListening(recognizerIntent);
}
@Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
Log.i(LOG_TAG, "FAILED " + errorMessage);
returnedError.setText(errorMessage);
// rest voice recogniser
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
public void onEvent(int arg0, Bundle arg1) {
Log.i(LOG_TAG, "onEvent");
}
@Override
public void onPartialResults(Bundle arg0) {
Log.i(LOG_TAG, "onPartialResults");
}
@Override
public void onReadyForSpeech(Bundle arg0) {
Log.i(LOG_TAG, "onReadyForSpeech");
}
@Override
public void onRmsChanged(float rmsdB) {
//Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
progressBar.setProgress((int) rmsdB);
}
public String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio recording error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client side error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network timeout";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "error from server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No speech input";
break;
default:
message = "Didn't understand, please try again.";
break;
}
return message;
}
}