I am writing a class for an app which supports streaming and recording video. In short, when the phone is streaming and recording, audio is saved in a PCM file, and video is saved in an mp4 file, using a MediaRecorder. My goal is, when the recording completes, to use a MediaMuxer and combine both inputs to a new, combined .mp4 file.
I've tried using a MediaMuxer to encode the audio and extract the video using a MediaExtractor. Both the original video and audio files are intact, and the output files contains proper audio, yet the video seems corrupted, as if frames are skipped.
This is the code that I am currently using:
public class StreamRecordingMuxer {
private static final String TAG = StreamRecordingMuxer.class.getSimpleName();
private static final String COMPRESSED_AUDIO_FILE_MIME_TYPE = "audio/mp4a-latm";
private static final int CODEC_TIMEOUT = 5000;
private int bitrate;
private int sampleRate;
private int channelCount;
// Audio state
private MediaFormat audioFormat;
private MediaCodec mediaCodec;
private MediaMuxer mediaMuxer;
private ByteBuffer[] codecInputBuffers;
private ByteBuffer[] codecOutputBuffers;
private MediaCodec.BufferInfo audioBufferInfo;
private String outputPath;
private int audioTrackId;
private int totalBytesRead;
private double presentationTimeUs;
// Video state
private int videoTrackId;
private MediaExtractor videoExtractor;
private MediaFormat videoFormat;
private String videoPath;
private int videoTrackIndex;
private int frameMaxInputSize;
private int rotationDegrees;
public StreamRecordingMuxer(final int bitrate, final int sampleRate, int channelCount) {
this.bitrate = bitrate;
this.sampleRate = sampleRate;
this.channelCount = channelCount;
}
public void setOutputPath(final String outputPath) {
this.outputPath = outputPath;
}
public void setVideoPath(String videoPath) {
this.videoPath = videoPath;
}
public void prepare() {
if (outputPath == null) {
throw new IllegalStateException("The output path must be set first!");
}
try {
audioFormat = MediaFormat.createAudioFormat(COMPRESSED_AUDIO_FILE_MIME_TYPE, sampleRate, channelCount);
audioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC);
audioFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitrate);
if (videoPath != null) {
videoExtractor = new MediaExtractor();
videoExtractor.setDataSource(videoPath);
videoFormat = findVideoFormat(videoExtractor);
}
mediaCodec = MediaCodec.createEncoderByType(COMPRESSED_AUDIO_FILE_MIME_TYPE);
mediaCodec.configure(audioFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
mediaCodec.start();
codecInputBuffers = mediaCodec.getInputBuffers();
codecOutputBuffers = mediaCodec.getOutputBuffers();
audioBufferInfo = new MediaCodec.BufferInfo();
mediaMuxer = new MediaMuxer(outputPath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
if (videoPath != null) {
videoTrackId = mediaMuxer.addTrack(videoFormat);
mediaMuxer.setOrientationHint(rotationDegrees);
}
totalBytesRead = 0;
presentationTimeUs = 0;
} catch (IOException e) {
Log.e(TAG, "Exception while initializing StreamRecordingMuxer", e);
}
}
public void stop() {
Log.d(TAG, "Stopping StreamRecordingMuxer");
handleEndOfStream();
mediaCodec.stop();
mediaCodec.release();
mediaMuxer.stop();
mediaMuxer.release();
if (videoExtractor != null) {
videoExtractor.release();
}
}
private void handleEndOfStream() {
int inputBufferIndex = mediaCodec.dequeueInputBuffer(CODEC_TIMEOUT);
mediaCodec.queueInputBuffer(inputBufferIndex, 0, 0, (long) presentationTimeUs, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
writeAudioOutputs();
}
private MediaFormat findVideoFormat(MediaExtractor extractor) {
MediaFormat videoFormat;
int videoTrackCount = extractor.getTrackCount();
for (int i = 0; i < videoTrackCount; i++) {
videoFormat = extractor.getTrackFormat(i);
Log.d(TAG, "Video Format " + videoFormat.toString());
String mimeType = videoFormat.getString(MediaFormat.KEY_MIME);
if (mimeType.startsWith("video/")) {
videoTrackIndex = i;
frameMaxInputSize = videoFormat.getInteger(MediaFormat.KEY_MAX_INPUT_SIZE);
rotationDegrees = videoFormat.getInteger(MediaFormat.KEY_ROTATION);
// frameRate = videoFormat.getInteger(MediaFormat.KEY_FRAME_RATE);
// videoDuration = videoFormat.getLong(MediaFormat.KEY_DURATION);
return videoFormat;
}
}
return null;
}
private void writeVideoToMuxer() {
ByteBuffer buffer = ByteBuffer.allocate(frameMaxInputSize);
MediaCodec.BufferInfo videoBufferInfo = new MediaCodec.BufferInfo();
videoExtractor.unselectTrack(videoTrackIndex);
videoExtractor.selectTrack(videoTrackIndex);
while (true) {
buffer.clear();
int sampleSize = videoExtractor.readSampleData(buffer, 0);
if (sampleSize < 0) {
videoExtractor.unselectTrack(videoTrackIndex);
break;
}
videoBufferInfo.size = sampleSize;
videoBufferInfo.presentationTimeUs = videoExtractor.getSampleTime();
videoBufferInfo.flags = videoExtractor.getSampleFlags();
mediaMuxer.writeSampleData(videoTrackId, buffer, videoBufferInfo);
videoExtractor.advance();
}
}
private void encodeAudioPCM(InputStream is) throws IOException {
byte[] tempBuffer = new byte[2 * sampleRate];
boolean hasMoreData = true;
boolean stop = false;
while (!stop) {
int inputBufferIndex = 0;
int currentBatchRead = 0;
while (inputBufferIndex != -1 && hasMoreData && currentBatchRead <= 50 * sampleRate) {
inputBufferIndex = mediaCodec.dequeueInputBuffer(CODEC_TIMEOUT);
if (inputBufferIndex >= 0) {
ByteBuffer buffer = codecInputBuffers[inputBufferIndex];
buffer.clear();
int bytesRead = is.read(tempBuffer, 0, buffer.limit());
if (bytesRead == -1) {
mediaCodec.queueInputBuffer(inputBufferIndex, 0, 0, (long) presentationTimeUs, 0);
hasMoreData = false;
stop = true;
} else {
totalBytesRead += bytesRead;
currentBatchRead += bytesRead;
buffer.put(tempBuffer, 0, bytesRead);
mediaCodec.queueInputBuffer(inputBufferIndex, 0, bytesRead, (long) presentationTimeUs, 0);
presentationTimeUs = 1000000L * (totalBytesRead / 2) / sampleRate;
}
}
}
writeAudioOutputs();
}
is.close();
}
public void start(InputStream inputStream) throws IOException {
Log.d(TAG, "Starting encoding of InputStream");
encodeAudioPCM(inputStream);
Log.d(TAG, "Finished encoding of InputStream");
if (videoPath != null) {
writeVideoToMuxer();
}
}
private void writeAudioOutputs() {
int outputBufferIndex = 0;
while (outputBufferIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
outputBufferIndex = mediaCodec.dequeueOutputBuffer(audioBufferInfo, CODEC_TIMEOUT);
if (outputBufferIndex >= 0) {
ByteBuffer encodedData = codecOutputBuffers[outputBufferIndex];
encodedData.position(audioBufferInfo.offset);
encodedData.limit(audioBufferInfo.offset + audioBufferInfo.size);
if ((audioBufferInfo.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0 && audioBufferInfo.size != 0) {
mediaCodec.releaseOutputBuffer(outputBufferIndex, false);
} else {
mediaMuxer.writeSampleData(audioTrackId, codecOutputBuffers[outputBufferIndex], audioBufferInfo);
mediaCodec.releaseOutputBuffer(outputBufferIndex, false);
}
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
audioFormat = mediaCodec.getOutputFormat();
audioTrackId = mediaMuxer.addTrack(audioFormat);
mediaMuxer.start();
}
}
}
}
I've finally managed to find an answer, unrelated to the actual Muxer code: it turns out, when creating the audio file, the presentation times were miscalculated.