androidopengl-es-2.0android-mediacodecmediaextractor

Recoding one H.264 video to another using opengl surfaces is very slow on my android


I'm developing function of translating one video into another with additional effects for each frame. I decided to use opengl-es for applying effects on each frame. My input and output videos are in MP4 using H.264 codec. I use MediaCodec API (android api 18+) for decoding H.264 into the opengl texture, then draw on the surface using this texture with my shader. I thought that using MediaCodec with H.264 will do hardware decoding on android and it will be fast. But appeared that it is not. Recoding small 432x240 15 seconds video consumed 28 seconds of total time!

Please, take a look at my code + profile information and share some advice, critics if I'm doing something wrong.

My code:

    private void editVideoFile()
{
    if (VERBOSE)
    {
        Log.d(TAG, "editVideoFile " + mWidth + "x" + mHeight);
    }

    MediaCodec decoder = null;

    MediaCodec encoder = null;
    InputSurface inputSurface = null;
    OutputSurface outputSurface = null;
    try
    {
        File inputFile = new File(FILES_DIR, INPUT_FILE);   // must be an absolute path
        // The MediaExtractor error messages aren't very useful.  Check to see if the input
        // file exists so we can throw a better one if it's not there.
        if (!inputFile.canRead())
        {
            throw new FileNotFoundException("Unable to read " + inputFile);
        }

        extractor = new MediaExtractor();
        extractor.setDataSource(inputFile.toString());
        int trackIndex = inVideoTrackIndex = selectTrack(extractor);
        if (trackIndex < 0)
        {
            throw new RuntimeException("No video track found in " + inputFile);
        }
        extractor.selectTrack(trackIndex);

        MediaFormat inputFormat = extractor.getTrackFormat(trackIndex);
        mWidth = inputFormat.getInteger(MediaFormat.KEY_WIDTH);
        mHeight = inputFormat.getInteger(MediaFormat.KEY_HEIGHT);

        if (VERBOSE)
        {
            Log.d(TAG, "Video size is " + mWidth + "x" + mHeight);
        }

        // Create an encoder format that matches the input format.  (Might be able to just
        // re-use the format used to generate the video, since we want it to be the same.)

        MediaFormat outputFormat = MediaFormat.createVideoFormat(MIME_TYPE, mWidth, mHeight);
        outputFormat.setInteger(MediaFormat.KEY_COLOR_FORMAT,
                MediaCodecInfo.CodecCapabilities.COLOR_FormatSurface);
        outputFormat.setInteger(MediaFormat.KEY_BIT_RATE,
                getFormatValue(inputFormat, MediaFormat.KEY_BIT_RATE, BIT_RATE));
        outputFormat.setInteger(MediaFormat.KEY_FRAME_RATE,
                getFormatValue(inputFormat, MediaFormat.KEY_FRAME_RATE, FRAME_RATE));
        outputFormat.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL,
                getFormatValue(inputFormat,MediaFormat.KEY_I_FRAME_INTERVAL, IFRAME_INTERVAL));
        try
        {
            encoder = MediaCodec.createEncoderByType(MIME_TYPE);
        }
        catch (IOException iex)
        {
            throw new RuntimeException(iex);
        }
        encoder.configure(outputFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
        inputSurface = new InputSurface(encoder.createInputSurface());
        inputSurface.makeCurrent();
        encoder.start();

        // Output filename.  Ideally this would use Context.getFilesDir() rather than a
        // hard-coded output directory.
        String outputPath = new File(OUTPUT_DIR,
                "transformed-" + mWidth + "x" + mHeight + ".mp4").toString();
        Log.d(TAG, "output file is " + outputPath);


        // Create a MediaMuxer.  We can't add the video track and start() the muxer here,
        // because our MediaFormat doesn't have the Magic Goodies.  These can only be
        // obtained from the encoder after it has started processing data.
        //
        // We're not actually interested in multiplexing audio.  We just want to convert
        // the raw H.264 elementary stream we get from MediaCodec into a .mp4 file.
        try
        {
            mMuxer = new MediaMuxer(outputPath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
        }
        catch (IOException ioe)
        {
            throw new RuntimeException("MediaMuxer creation failed", ioe);
        }

        mTrackIndex = -1;
        mMuxerStarted = false;


        // OutputSurface uses the EGL context created by InputSurface.
        try
        {
            decoder = MediaCodec.createDecoderByType(MIME_TYPE);
        }
        catch (IOException iex)
        {
            throw new RuntimeException(iex);
        }
        outputSurface = new OutputSurface();
        outputSurface.changeFragmentShader(FRAGMENT_SHADER);
        decoder.configure(inputFormat, outputSurface.getSurface(), null, 0);
        decoder.start();

        editVideoData(decoder, outputSurface, inputSurface, encoder);
    }
    catch (Exception ex)
    {
        Log.e(TAG, "Error processing", ex);
        throw new RuntimeException(ex);
    }
    finally
    {
        if (VERBOSE)
        {
            Log.d(TAG, "shutting down encoder, decoder");
        }
        if (outputSurface != null)
        {
            outputSurface.release();
        }
        if (inputSurface != null)
        {
            inputSurface.release();
        }
        if (encoder != null)
        {
            encoder.stop();
            encoder.release();
        }
        if (decoder != null)
        {
            decoder.stop();
            decoder.release();
        }
        if (mMuxer != null)
        {
            mMuxer.stop();
            mMuxer.release();
            mMuxer = null;
        }
    }
}

/**
 * Selects the video track, if any.
 *
 * @return the track index, or -1 if no video track is found.
 */
private int selectTrack(MediaExtractor extractor)
{
    // Select the first video track we find, ignore the rest.
    int numTracks = extractor.getTrackCount();
    for (int i = 0; i < numTracks; i++)
    {
        MediaFormat format = extractor.getTrackFormat(i);
        String mime = format.getString(MediaFormat.KEY_MIME);
        if (mime.startsWith("video/"))
        {
            if (VERBOSE)
            {
                Log.d(TAG, "Extractor selected track " + i + " (" + mime + "): " + format);
            }
            return i;
        }
    }

    return -1;
}

/**
 * Edits a stream of video data.
 */
private void editVideoData(MediaCodec decoder,
                           OutputSurface outputSurface, InputSurface inputSurface, MediaCodec encoder)
{
    final int TIMEOUT_USEC = 10000;
    ByteBuffer[] decoderInputBuffers = decoder.getInputBuffers();
    ByteBuffer[] encoderOutputBuffers = encoder.getOutputBuffers();
    MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
    int inputChunk = 0;
    boolean outputDone = false;
    boolean inputDone = false;
    boolean decoderDone = false;
    while (!outputDone)
    {
        if (VERBOSE)
        {
            Log.d(TAG, "edit loop");
        }
        // Feed more data to the decoder.
        if (!inputDone)
        {
            int inputBufIndex = decoder.dequeueInputBuffer(TIMEOUT_USEC);
            if (inputBufIndex >= 0)
            {
                ByteBuffer inputBuf = decoderInputBuffers[inputBufIndex];
                // Read the sample data into the ByteBuffer.  This neither respects nor
                // updates inputBuf's position, limit, etc.
                int chunkSize = extractor.readSampleData(inputBuf, 0);
                if (chunkSize < 0)
                {
                    // End of stream -- send empty frame with EOS flag set.
                    decoder.queueInputBuffer(inputBufIndex, 0, 0, 0L,
                            MediaCodec.BUFFER_FLAG_END_OF_STREAM);
                    inputDone = true;
                    if (VERBOSE)
                    {
                        Log.d(TAG, "sent input EOS");
                    }
                }
                else
                {
                    if (extractor.getSampleTrackIndex() != inVideoTrackIndex)
                    {
                        Log.w(TAG, "WEIRD: got sample from track " +
                                extractor.getSampleTrackIndex() + ", expected " + inVideoTrackIndex);
                    }
                    long presentationTimeUs = extractor.getSampleTime();
                    decoder.queueInputBuffer(inputBufIndex, 0, chunkSize,
                            presentationTimeUs, 0 /*flags*/);
                    if (VERBOSE)
                    {
                        Log.d(TAG, "submitted frame " + inputChunk + " to dec, size=" +
                                chunkSize);
                    }
                    inputChunk++;
                    extractor.advance();
                }
            }
            else
            {
                if (VERBOSE)
                {
                    Log.d(TAG, "input buffer not available");
                }
            }
        }


        // Assume output is available.  Loop until both assumptions are false.
        boolean decoderOutputAvailable = !decoderDone;
        boolean encoderOutputAvailable = true;
        while (decoderOutputAvailable || encoderOutputAvailable)
        {
            // Start by draining any pending output from the encoder.  It's important to
            // do this before we try to stuff any more data in.
            int encoderStatus = encoder.dequeueOutputBuffer(info, TIMEOUT_USEC);
            if (encoderStatus == MediaCodec.INFO_TRY_AGAIN_LATER)
            {
                // no output available yet
                if (VERBOSE)
                {
                    Log.d(TAG, "no output from encoder available");
                }
                encoderOutputAvailable = false;
            }
            else if (encoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED)
            {
                encoderOutputBuffers = encoder.getOutputBuffers();
                if (VERBOSE)
                {
                    Log.d(TAG, "encoder output buffers changed");
                }
            }
            else if (encoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED)
            {
                if (mMuxerStarted)
                {
                    throw new RuntimeException("format changed twice");
                }
                MediaFormat newFormat = encoder.getOutputFormat();
                Log.d(TAG, "encoder output format changed: " + newFormat);

                // now that we have the Magic Goodies, start the muxer
                mTrackIndex = mMuxer.addTrack(newFormat);
                mMuxer.start();
                mMuxerStarted = true;
            }
            else if (encoderStatus < 0)
            {
                throw new RuntimeException("unexpected result from encoder.dequeueOutputBuffer: " + encoderStatus);
            }
            else
            { // encoderStatus >= 0
                ByteBuffer encodedData = encoderOutputBuffers[encoderStatus];
                if (encodedData == null)
                {
                    throw new RuntimeException("encoderOutputBuffer " + encoderStatus + " was null");
                }

                if ((info.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0)
                {
                    // The codec config data was pulled out and fed to the muxer when we got
                    // the INFO_OUTPUT_FORMAT_CHANGED status.  Ignore it.
                    if (VERBOSE)
                    {
                        Log.d(TAG, "ignoring BUFFER_FLAG_CODEC_CONFIG");
                    }
                    info.size = 0;
                }

                // Write the data to the output "file".
                if (info.size != 0)
                {
                    if (!mMuxerStarted)
                    {
                        throw new RuntimeException("muxer hasn't started");
                    }

                    // adjust the ByteBuffer values to match BufferInfo (not needed?)
                    encodedData.position(info.offset);
                    encodedData.limit(info.offset + info.size);

                    mMuxer.writeSampleData(mTrackIndex, encodedData, info);
                    if (VERBOSE)
                    {
                        Log.d(TAG, "sent " + info.size + " bytes to muxer");
                    }
                }
                outputDone = (info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0;
                encoder.releaseOutputBuffer(encoderStatus, false);
            }
            if (encoderStatus != MediaCodec.INFO_TRY_AGAIN_LATER)
            {
                // Continue attempts to drain output.
                continue;
            }
            // Encoder is drained, check to see if we've got a new frame of output from
            // the decoder.  (The output is going to a Surface, rather than a ByteBuffer,
            // but we still get information through BufferInfo.)
            if (!decoderDone)
            {
                int decoderStatus = decoder.dequeueOutputBuffer(info, TIMEOUT_USEC);
                if (decoderStatus == MediaCodec.INFO_TRY_AGAIN_LATER)
                {
                    // no output available yet
                    if (VERBOSE)
                    {
                        Log.d(TAG, "no output from decoder available");
                    }
                    decoderOutputAvailable = false;
                }
                else if (decoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED)
                {
                    //decoderOutputBuffers = decoder.getOutputBuffers();
                    if (VERBOSE)
                    {
                        Log.d(TAG, "decoder output buffers changed (we don't care)");
                    }
                }
                else if (decoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED)
                {
                    // expected before first buffer of data
                    MediaFormat newFormat = decoder.getOutputFormat();
                    if (VERBOSE)
                    {
                        Log.d(TAG, "decoder output format changed: " + newFormat);
                    }
                }
                else if (decoderStatus < 0)
                {
                    throw new RuntimeException("unexpected result from decoder.dequeueOutputBuffer: " + decoderStatus);
                }
                else
                { // decoderStatus >= 0
                    if (VERBOSE)
                    {
                        Log.d(TAG, "surface decoder given buffer "
                                + decoderStatus + " (size=" + info.size + ")");
                    }
                    // The ByteBuffers are null references, but we still get a nonzero
                    // size for the decoded data.
                    boolean doRender = (info.size != 0);
                    // As soon as we call releaseOutputBuffer, the buffer will be forwarded
                    // to SurfaceTexture to convert to a texture.  The API doesn't
                    // guarantee that the texture will be available before the call
                    // returns, so we need to wait for the onFrameAvailable callback to
                    // fire.  If we don't wait, we risk rendering from the previous frame.
                    decoder.releaseOutputBuffer(decoderStatus, doRender);
                    if (doRender)
                    {
                        // This waits for the image and renders it after it arrives.
                        if (VERBOSE)
                        {
                            Log.d(TAG, "awaiting frame");
                        }
                        outputSurface.awaitNewImage();
                        outputSurface.drawImage();
                        // Send it to the encoder.
                        inputSurface.setPresentationTime(info.presentationTimeUs * 1000);
                        if (VERBOSE)
                        {
                            Log.d(TAG, "swapBuffers");
                        }
                        inputSurface.swapBuffers();
                    }
                    if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0)
                    {
                        // forward decoder EOS to encoder
                        if (VERBOSE)
                        {
                            Log.d(TAG, "signaling input EOS");
                        }
                        if (WORK_AROUND_BUGS)
                        {
                            // Bail early, possibly dropping a frame.
                            return;
                        }
                        else
                        {
                            encoder.signalEndOfInputStream();
                        }
                    }
                }
            }
        }
    }
}

And profile information: Profile shows most of the time spent to dequeueOutputBuffer

Tested on Samsung Galaxy Note3 Intl (Qualcom)


Solution

  • Your issue probably is in how you synchronously wait for events on one single thread, with a nonzero timeout.

    You could probably get better throuhput if you lower the timeout. Most of the hardware codecs work with a bit of latency; you can have a good total throughput, but don't expect to have a result (a frame encoded or decoded) immediately.

    Ideally, you would use a zero timeout to check all inputs/outputs of both encoder and decoder, and in case there's no free buffers on either points, wait with a nonzero timeout on e.g. encoder output or decoder output.

    If you can target Android 5.0, with asynchronous mode in MediaCodec, it's much easier to get this done right. See e.g. https://github.com/mstorsjo/android-decodeencodetest for an example on how to do this. See also https://stackoverflow.com/a/35885471/3115956 for a longer discussion on this issue.

    You can also have a look at some similar questions.