I've been trying to record a human voice on android using AudioRecord with 32-bit depth and write them into a wav file. I know that recording with 32-bit precision is only available on API level 23 or higher (as per documentation says) using AudioFormat.ENCODING_PCM_FLOAT
. I did some test on devices with API level 23, but still for some reason, the resulting audio is corrupted (all i can hear is a complete noise). Here's what my code looks like:
private AudioRecord mRecorder;
private int mBufferSize;
private Thread mRecordingThread;
private boolean mIsRecording;
private String tempPath = "/some/path/tempFile.wav";
private String outputPath = "/some/path/recording.wav";
@TargetApi(23)
public void startRecording() {
mBufferSize = AudioRecord.getMinBufferSize(16000, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_FLOAT);
mRecorder = new AudioRecord(MediaRecorder.AudioSource.MIC, 16000, AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_FLOAT, mBufferSize);
if (mRecorder.getState() == AudioRecord.STATE_INITIALIZED
&& mBufferSize != AudioRecord.ERROR_BAD_VALUE) {
return;
}
mRecordingThread = new Thread(new Runnable() {
@Override
public void run() {
android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
try {
FileOutputStream outputStream = new FileOutputStream(tempPath);
float[] audioData = new float[mBufferSize / 4];
mRecorder.startRecording();
mIsRecording = true;
while (mIsRecording) {
int readSize = mRecorder.read(audioData, 0, audioData.length, AudioRecord.READ_BLOCKING);
if (readSize == AudioRecord.ERROR_INVALID_OPERATION || readSize == AudioRecord.ERROR_BAD_VALUE) {
break;
}
// convert float to byte
byte[] bytes = new byte[audioData.length * 4];
ByteBuffer.wrap(bytes).order(ByteOrder.nativeOrder()).asFloatBuffer().put(audioData);
try {
outputStream.write(bytes);
} catch (IOException e) {
Log.e("Recorder", e.getMessage(), e);
}
}
outputStream.close();
} catch (IOException e) {
Log.e("Recorder", e.getMessage(), e);
} catch (Exception e) {
Log.e("Recorder", e.getMessage(), e);
}
}
});
mRecordingThread.start();
}
public void stopRecording() {
if (mIsRecording) {
mIsRecording = false;
if (mRecordingThread != null && mRecordingThread.getState() != Thread.State.NEW
&& mRecordingThread.getState() != Thread.State.TERMINATED) {
try {
mRecordingThread.join();
} catch (InterruptedException e) {
// ...
}
}
if (mRecorder.getState() == AudioRecord.RECORDSTATE_RECORDING) {
mRecorder.stop();
}
mRecordingThread = null;
generateFinalAudio();
}
}
public void generateFinalAudio() {
CopyAudioTask task = new CopyAudioTask();
task.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR, tempPath, outputPath);
}
private class CopyAudioTask extends AsyncTask<String, String, Void> {
@Override
protected Void doInBackground(String... params) {
String tmpPath = params[0];
String outPath = params[1];
try {
FileInputStream in = new FileInputStream(tmpPath);
FileOutputStream out = new FileOutputStream(outPath);
byte[] data = new byte[mBufferSize];
writeWaveFileHeader(in, out);
while (in.read(data) != -1) {
out.write(data);
}
in.close();
out.close();
} catch (Exception e) {
Log.e("Recorder", e.getMessage(), e);
}
return null;
}
}
private void writeWaveFileHeader(FileInputStream in, FileOutputStream out) throws IOException {
int bitWidth = 32;
long longSampleRate = mRecorder.getSampleRate();
int channels = mRecorder.getChannelCount();
long totalAudioLen = in.getChannel().size();
long totalDataLen = totalAudioLen + 36;
long byteRate = bitWidth * longSampleRate * channels / 8;
byte[] header = new byte[44];
header[0] = 'R'; // RIFF/WAVE header
header[1] = 'I';
header[2] = 'F';
header[3] = 'F';
header[4] = (byte) (totalDataLen & 0xff);
header[5] = (byte) ((totalDataLen >> 8) & 0xff);
header[6] = (byte) ((totalDataLen >> 16) & 0xff);
header[7] = (byte) ((totalDataLen >> 24) & 0xff);
header[8] = 'W';
header[9] = 'A';
header[10] = 'V';
header[11] = 'E';
header[12] = 'f'; // 'fmt ' chunk
header[13] = 'm';
header[14] = 't';
header[15] = ' ';
header[16] = 16; // 4 bytes: size of 'fmt ' chunk
header[17] = 0;
header[18] = 0;
header[19] = 0;
header[20] = 1; // format = 1
header[21] = 0;
header[22] = (byte) channels;
header[23] = 0;
header[24] = (byte) (longSampleRate & 0xff);
header[25] = (byte) ((longSampleRate >> 8) & 0xff);
header[26] = (byte) ((longSampleRate >> 16) & 0xff);
header[27] = (byte) ((longSampleRate >> 24) & 0xff);
header[28] = (byte) (byteRate & 0xff);
header[29] = (byte) ((byteRate >> 8) & 0xff);
header[30] = (byte) ((byteRate >> 16) & 0xff);
header[31] = (byte) ((byteRate >> 24) & 0xff);
header[32] = (byte) (channels * bitWidth / 8); // block align
header[33] = 0;
header[34] = (byte) bitWidth; // bits per sample
header[35] = 0;
header[36] = 'd';
header[37] = 'a';
header[38] = 't';
header[39] = 'a';
header[40] = (byte) (totalAudioLen & 0xff);
header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
out.write(header, 0, 44);
}
Can anyone figure out what i'm doing wrong here? I've tried using 16-bits (ENCODING_PCM_16BIT
, and used short[]
as data buffer) and it's working perfectly.
Any help would be really appreciated. Thanks!
Ok, i've now figured it out. The problem is with the header. The value of header[20]
should be 3
or 0x0003
for 32-bit float PCM data as pointed out in this page.