http-live-streaming aac libavcodec libavformat avcodec

avcodec config for AAC into HLS/MPEG-TS ([mpegts @ 0x7fc4c00343c0] AAC bitstream not in ADTS format and extradata missing)

We're encoding video into H264 and raw PCM samples into AAC for HLS streaming. The video works fine, but having trouble configuring the AAC encoder in libavcodec.

This SO question says:

There are two ways to put AAC into transport stream.

1. Using ADTS syntax (MPEG2-style).

In a such case PMT's stream_type should be specified as 0x0F (ISO/IEC 13818-7 Audio with ADTS transport syntax).

So, you are limited to using "old" (MPEG2) AAC versions only, without SBR and PS.

2. Using LATM+LOAS/AudioSyncStream syntax (MPEG4-style).

In a such case PMT's stream_type should be specified as 0x11 (ISO/IEC 14496-3 Audio with the LATM transport syntax).

And you can use all the force of "new" (MPEG4) AAC features, including SBR and PS.

Furthermore, DVB standard ETSI TS 101 154 demands: HEv1/HEv2 AAC shall be transmitted using LATM syntax.

But after a lot of searching I cannot find any documentation on how to do either of these. What is missing from the configuration below to get the encoded audio with either ADTS or LATM before it is passed to the MPEG-TS mux (for output to HLS)?

The current code to set up the AAC codec gives the error [mpegts @ 0x7fc4c00343c0] AAC bitstream not in ADTS format and extradata missing

The AAC encoder setup (error checking removed for brevity)

/// Set up Encoder ///
mpAudioCodec = avcodec_find_encoder(AV_CODEC_ID_AAC);
mpAudioCodecContext = avcodec_alloc_context3(mpAudioCodec);

mpAudioCodecContext->bit_rate       = DEFAULT_AUD_BITRATE;
mpAudioCodecContext->sample_rate    = DEFAULT_AUD_SAMPLE_RATE;
mpAudioCodecContext->channel_layout = DEFAULT_AUD_CHAN_LAYOUT;
mpAudioCodecContext->channels       = 2; 
mpAudioCodecContext->sample_fmt     = AV_SAMPLE_FMT_FLTP; // S16 not supported. Must convert

mpAudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

rc = avcodec_open2(mpAudioCodecContext, mpAudioCodec, 0);

HLS MUX SETUP

avformat_alloc_output_context2(&mpOutputMux, 0, "hls", path.c_str());

// VIDEO TRACK
mpVideoTrack = avformat_new_stream(mpOutputMux, 0);
mpVideoTrack->id = 0;
mpVideoTrack->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
mpVideoTrack->codecpar->codec_id   = AV_CODEC_ID_H264;

mpVideoTrack->time_base      = (AVRational) { 1,          mFrameRate };
mpVideoTrack->avg_frame_rate = (AVRational) { mFrameRate, 1          };

// AUDIO TRACK
mpAudioTrack = avformat_new_stream(mpOutputMux, 0);
mpAudioTrack->id = 1;

mpAudioTrack->codecpar->codec_type  = AVMEDIA_TYPE_AUDIO;
mpAudioTrack->codecpar->codec_id    = DEFAULT_AUDIO_CODEC;
mpAudioTrack->codecpar->sample_rate = mpAudioCodecContext->sample_rate;

mpAudioTrack->time_base.den = mpAudioCodecContext->sample_rate;
mpAudioTrack->time_base.num = 1;

AVDictionary *hlsOptions = NULL;
av_dict_set(&hlsOptions,     "hls_segment_type",   "mpegts", 0);
av_dict_set(&hlsOptions,     "segment_list_type",  "m3u8",   0);
av_dict_set_int(&hlsOptions, "hls_list_size",      mPlaylistSize,  0);
av_dict_set_int(&hlsOptions, "hls_time",           mChunkDurSec,   0);
av_dict_set(&hlsOptions,     "hls_flags",          "delete_segments", 0);
av_dict_set(&hlsOptions,     "hls_segment_filename", segPath.c_str(),   0);

av_dict_set_int(&hlsOptions, "reference_stream",   mpVideoTrack->index, 0);
av_dict_set(&hlsOptions,     "segment_list_flags", "cache+live", 0);

int ret = avformat_write_header(mpOutputMux, &hlsOptions);

Encode Loop

int bytesCopied = mAudEsBuffer.popData(mpPcmS16Buf, mpPcmAudioFrame->nb_samples);

// resample to float
int rc = swr_convert(mpAudioResampleCtx, mpPcmAudioFrame->data, mpPcmAudioFrame->nb_samples, (const uint8_t**) &mpPcmS16Buf, mpPcmAudioFrame->nb_samples);

/* Set a timestamp based on the sample rate for the container. */
mCurAudPts += mpPcmAudioFrame->nb_samples;
mpPcmAudioFrame->pts = mCurAudPts;

// send frame for encoding to AAC
rc = avcodec_send_frame(mpAudioCodecContext, mpPcmAudioFrame);

/* read all the available output packets (in general there may be any number of them */
while (rc >= 0)
{
    // need to init packet every time??
    /* Set the packet data and size so that it is recognized as being empty. */
    av_init_packet(mpEncAudioPacket);
    mpEncAudioPacket->data = NULL;
    mpEncAudioPacket->size = 0;

    rc = avcodec_receive_packet(mpAudioCodecContext, mpEncAudioPacket);
    if (rc < 0)
    {
        printf("TqHlsLib::readAndMuxAudio() - Error encoding audio frame: %s\n", av_make_error_string(mpErr, TQERRLEN, rc));
        return HLS_DEC_ERROR;
    }

    TRACE(("%T %t TqHlsLib::readAndMuxAudio() - Got an encoded audio packet. %u bytes\n",
        mpEncAudioPacket->size ));

    /* rescale output packet timestamp values from codec to stream timebase */
    av_packet_rescale_ts(mpEncAudioPacket, mpAudioTrack->time_base, mpAudioTrack->time_base);
    mpEncAudioPacket->stream_index = mpAudioTrack->index;

    /* Write the compressed frame to the media file. */
    rc = av_interleaved_write_frame(mpOutputMux, mpEncAudioPacket);
    if (rc < 0)
    {
        fprintf(stderr, "TqHlsLib::addVideoH264Packet - Error while writing audio packet: %s\n",
            av_make_error_string(mpErr, TQERRLEN, ret));

        // return some error here
    }
    av_packet_unref(mpEncAudioPacket);

}

The Output

[mpegts @ 0x7fb280144e00] AAC bitstream not in ADTS format and extradata missing
20:24:52.327418 24388 TqHlsLib::readAndMuxAudio() - Got an encoded audio packet. 185 bytes
[mpegts @ 0x7fb280144e00] AAC bitstream not in ADTS format and extradata missing
20:24:52.372975 24388 TqHlsLib::readAndMuxAudio() - Got an encoded audio packet. 188 bytes
[mpegts @ 0x7fb280144e00] AAC bitstream not in ADTS format and extradata missing

Solution

After a lot of searching I've found two ways to add the ADTS header to audio in order to properly MUX an MPEG-TS.

The first was to set up a separate AVFormatContext for ADTS, create an AVStream with the the encoded AAC packets and then use send_frame / receive_frame to get the same AAC data but this time with the ADTS header applied.

That added a lot of complexity and latency.

In the end, I just manually added an ADTS header to each encoded AAC packet before passing to av_interleaved_write_frame.

To help those in the future, here's the code:

/*
    ADTS HEADER: 7 Bytes. See ISO 13818-7 (2004)

    AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP

    A - Sync 0xFFFx
    B   1   MPEG Version: 0 for MPEG-4, 1 for MPEG-2
    C   2   Layer: always 0
    D   1   protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC
    E   2   profile, the MPEG-4 Audio Object Type minus 1
    F   4   MPEG-4 Sampling Frequency Index (15 is forbidden)
    G   1   private bit, guaranteed never to be used by MPEG, set to 0 when encoding, ignore when decoding
    H   3   MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an inband PCE)
    I   1   originality, set to 0 when encoding, ignore when decoding
    J   1   home, set to 0 when encoding, ignore when decoding
    K   1   copyrighted id bit, the next bit of a centrally registered copyright identifier, set to 0 when encoding, ignore when decoding
    L   1   copyright id start, signals that this frame's copyright id bit is the first bit of the copyright id, set to 0 when encoding, ignore when decoding
    M   13  frame length, this value must include 7 or 9 bytes of header length: FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
    O   11  Buffer fullness
    P   2   Number of AAC frames (RDBs) in ADTS frame minus 1, for maximum compatibility always use 1 AAC frame per ADTS frame
    Q   16  CRC if protection absent is 0

*/
void
addADTSHeader(uint8_t *inBuf, size_t inLen, const ADTSInfo &adtsInfo, std::vector<uint8_t> &outVec)
{
    outVec.clear();

    uint8_t b;

    // 0: Sync I
    outVec.push_back(0xFF);

    // 1: Sync II + BCCD
    b  = 0xF0;
    b |= ((adtsInfo.mpegVersion == 2) ? 0 : 1) << 3;
    b |= 1; // no protection
    outVec.push_back(b);

    // 2: EEFFFFGH
    int sampleFreqIdx = 4; // default 44100
    int i = 0;
    while (ADTS_SAMPLE_RATES[i] != LAST_INDEX)
    {
        if (ADTS_SAMPLE_RATES[i] == adtsInfo.sampleRate)
        {
            sampleFreqIdx = i;
            break;
        }
        i++;
    }

    b  = (adtsInfo.objectType - 1) << 6; // EE
    b |= sampleFreqIdx << 2;
    b |= (adtsInfo.channelConfig & 0x07) >> 2; // high bit of channel config
    outVec.push_back(b);

    // 3: HHIJKLMM
    b  = (adtsInfo.channelConfig & 0x03) << 6; // low two bits of channel config
    b |= ((7 + inLen) >> 11) & 0x03;           // high two bits of frame length
    outVec.push_back(b);

    // 4: MMMMMMMM
    b  = (7 + inLen) >> 3;                     // Frame length middle
    outVec.push_back(b);

    // 5: MMMOOOOO
    b  = ((7 + inLen) & 0x07) << 5;            // low three bits of frame length
    b |= 0x1F;                                 // buffer fullness 0x7FF VBR -> N/A
    outVec.push_back(b);

    // 6: OOOOOOPP
    b  = 0x3F << 2;                            // buffer fullness 0x7FF VBR -> N/A
    b |= 0;                                    // 1 AAC frame per ADTS
    outVec.push_back(b);

    outVec.insert(outVec.end(), inBuf, inBuf + inLen);
}