iosaudioaudiounitopus

AudioUnit + Opus codec = crackle issue


I am creating a voip app for iOS in objective-c. Currently i am trying to create the audio part: recording the audio data from microphone, encoding with Opus, decoding, and then playing. For the recording and playing i use AudioUnit. Also i made a buffer implementation which allocates places of memory each with initially set size. There are three main methods: - setBufferSize - for setting buffer's sub allocated spaces. - writeDataToBuffer - for creating new space(if needed), and filling data into current writing space. - readDataFromBuffer - read data from current reading space.

I use the buffer for storing the audio data there. It works good. I've tested it. Also if i try to use it without Opus just reading audio data, storing it into the buffer, reading from the buffer and then playing, everything works great. But the problem comes when i include opus. Actually it encodes and decodes the audio data, but the quality is not so good and there are some crackle as well. I was wondering what am i doing wrong? Here are pieces of my code:

AudioUnit:

OSStatus status;


m_sAudioDescription.componentType = kAudioUnitType_Output;
m_sAudioDescription.componentSubType = kAudioUnitSubType_VoiceProcessingIO/*kAudioUnitSubType_RemoteIO*/;
m_sAudioDescription.componentFlags = 0;
m_sAudioDescription.componentFlagsMask = 0;
m_sAudioDescription.componentManufacturer = kAudioUnitManufacturer_Apple;

AudioComponent inputComponent = AudioComponentFindNext(NULL, &m_sAudioDescription);

status = AudioComponentInstanceNew(inputComponent, &m_audioUnit);


// Enable IO for recording
UInt32 flag = 1;
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioOutputUnitProperty_EnableIO,
                              kAudioUnitScope_Input,
                              VOIP_AUDIO_INPUT_ELEMENT,
                              &flag,
                              sizeof(flag));

// Enable IO for playback
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioOutputUnitProperty_EnableIO,
                              kAudioUnitScope_Output,
                              VOIP_AUDIO_OUTPUT_ELEMENT,
                              &flag,
                              sizeof(flag));

// Describe format
m_sAudioFormat.mSampleRate          = 48000.00;//48000.00;/*44100.00*/;
m_sAudioFormat.mFormatID            = kAudioFormatLinearPCM;
m_sAudioFormat.mFormatFlags         = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked/* | kAudioFormatFlagsCanonical*/;
m_sAudioFormat.mFramesPerPacket     = 1;
m_sAudioFormat.mChannelsPerFrame    = 1;
m_sAudioFormat.mBitsPerChannel      = 16; //8 * bytesPerSample
m_sAudioFormat.mBytesPerFrame       = /*(UInt32)bytesPerSample;*/2; //bitsPerChannel / 8 * channelsPerFrame
m_sAudioFormat.mBytesPerPacket      = 2; //bytesPerFrame * framesPerPacket


// Apply format
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioUnitProperty_StreamFormat,
                              kAudioUnitScope_Output,
                              VOIP_AUDIO_INPUT_ELEMENT,
                              &m_sAudioFormat,
                              sizeof(m_sAudioFormat));

status = AudioUnitSetProperty(m_audioUnit,
                              kAudioUnitProperty_StreamFormat,
                              kAudioUnitScope_Input,
                              VOIP_AUDIO_OUTPUT_ELEMENT,
                              &m_sAudioFormat,
                              sizeof(m_sAudioFormat));


// Set input callback
AURenderCallbackStruct callbackStruct;
callbackStruct.inputProc = inputRenderCallback;
callbackStruct.inputProcRefCon = this;
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioOutputUnitProperty_SetInputCallback,
                              kAudioUnitScope_Global,
                              VOIP_AUDIO_INPUT_ELEMENT,
                              &callbackStruct,
                              sizeof(callbackStruct));

// Set output callback
callbackStruct.inputProc = outputRenderCallback;
callbackStruct.inputProcRefCon = this;
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioUnitProperty_SetRenderCallback,
                              kAudioUnitScope_Global,
                              VOIP_AUDIO_OUTPUT_ELEMENT,
                              &callbackStruct,
                              sizeof(callbackStruct));

//Enable Echo cancelation:
this->_setEchoCancelation(true);

//Enable Automatic Gain control:
this->_setAGC(false);

// Initialise
status = AudioUnitInitialize(m_audioUnit);

return noErr;

Input buffer allocation and setting the size of storing buffers:

void VoipAudio::_allocBuffer()
{
   UInt32 numFramesPerBuffer;
   UInt32 size = sizeof(/*VoipUInt32*/VoipInt16);
   AudioUnitGetProperty(m_audioUnit,
                     kAudioUnitProperty_MaximumFramesPerSlice,
                     kAudioUnitScope_Global,
                     VOIP_AUDIO_OUTPUT_ELEMENT,                         &numFramesPerBuffer,                         &siz    

   UInt32 inputBufferListSize = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * m_sAudioFormat.mChannelsPerFrame);
   inputBuffer = (AudioBufferList *)malloc(inputBufferListSize);
   inputBuffer->mNumberBuffers = m_sAudioFormat.mChannelsPerFrame;

   //pre-malloc buffers for AudioBufferLists
   for(VoipUInt32 tmp_int1 = 0; tmp_int1 < inputBuffer->mNumberBuffers; tmp_int1++)
   {
      inputBuffer->mBuffers[tmp_int1].mNumberChannels = 1;
      inputBuffer->mBuffers[tmp_int1].mDataByteSize = 2048;
      inputBuffer->mBuffers[tmp_int1].mData = malloc(2048);
      memset(inputBuffer->mBuffers[tmp_int1].mData, 0, 2048);
   }

   this->m_oAudioBuffer = new VoipBuffer();
   this->m_oAudioBuffer->setBufferSize(2048);

   this->m_oAudioReadBuffer = new VoipBuffer();
   this->m_oAudioReadBuffer->setBufferSize(2880);
 }

Record callback:

this->m_oAudioReadBuffer->writeDataToBuffer(samples, samplesSize);
void* tmp_buffer = this->m_oAudioReadBuffer->readDataFromBuffer();
if (tmp_buffer != nullptr)
{
   sVoipAudioCodecOpusEncodedResult* encodedSamples = VoipAudioCodecs::Opus_Encode((VoipInt16*)tmp_buffer, 2880);

   sVoipAudioCodecOpusDecodedResult* decodedSamples = VoipAudioCodecs::Opus_Decode(encodedSamples->m_data, encodedSamples->m_dataSize);


   this->m_oAudioBuffer->writeDataToBuffer(decodedSamples->m_data, decodedSamples->m_dataSize);

   free(encodedSamples->m_data);
   free(encodedSamples);
   free(decodedSamples->m_data);
   free(decodedSamples);
}

Playing callback:

void* tmp_buffer = this->m_oAudioBuffer->readDataFromBuffer();

if (tmp_buffer != nullptr)
{
   memset(buffer->mBuffers[0].mData, 0, 2048);
   memcpy(buffer->mBuffers[0].mData, tmp_buffer, 2048);
   buffer->mBuffers[0].mDataByteSize = 2048;
} else {
   memset(buffer->mBuffers[0].mData, 0, 2048);
   buffer->mBuffers[0].mDataByteSize = 2048;
}

Opus Init Code:

int _error = 0;

VoipAudioCodecs::m_oEncoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &_error);
if (_error < 0)
{
    fprintf(stderr, "VoipAudioCodecs error: failed to create an encoder: %s\n", opus_strerror(_error));

    return;
}

_error = opus_encoder_ctl(VoipAudioCodecs::m_oEncoder, OPUS_SET_BITRATE(BITRATE/*OPUS_BITRATE_MAX*/));
if (_error < 0)
{
    fprintf(stderr, "VoipAudioCodecs error: failed to set bitrate: %s\n", opus_strerror(_error));

    return;
}

VoipAudioCodecs::m_oDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &_error);
if (_error < 0)
{
    fprintf(stderr, "VoipAudioCodecs error: failed to create decoder: %s\n", opus_strerror(_error));

    return;
}

Opus encode/decode:

sVoipAudioCodecOpusEncodedResult* VoipAudioCodecs::Opus_Encode(VoipInt16* number, int samplesCount)
{
   unsigned char cbits[MAX_PACKET_SIZE];
   VoipInt32 nbBytes;

   nbBytes = opus_encode(VoipAudioCodecs::m_oEncoder, number, FRAME_SIZE, cbits, MAX_PACKET_SIZE);
   if (nbBytes < 0)
   {
      fprintf(stderr, "VoipAudioCodecs error: encode failed: %s\n", opus_strerror(nbBytes));

      return nullptr;
   }    

   sVoipAudioCodecOpusEncodedResult* result = (sVoipAudioCodecOpusEncodedResult* )malloc(sizeof(sVoipAudioCodecOpusEncodedResult));

   result->m_data = (unsigned char*)malloc(nbBytes);
   memcpy(result->m_data, cbits, nbBytes);
   result->m_dataSize = nbBytes;

   return result;
}

sVoipAudioCodecOpusDecodedResult* VoipAudioCodecs::Opus_Decode(void* encoded, VoipInt32 nbBytes)
{
    VoipInt16 decodedPacket[MAX_FRAME_SIZE];


    int frame_size = opus_decode(VoipAudioCodecs::m_oDecoder, (const unsigned char*)encoded, nbBytes, decodedPacket, MAX_FRAME_SIZE, 0);

    if (frame_size < 0)
    {
       fprintf(stderr, "VoipAudioCodecs error: decoder failed: %s\n", opus_strerror(frame_size));

       return nullptr;
    }

    sVoipAudioCodecOpusDecodedResult* result = (sVoipAudioCodecOpusDecodedResult* )malloc(sizeof(sVoipAudioCodecOpusDecodedResult));

    result->m_data = (VoipInt16*)malloc(frame_size / sizeof(VoipInt16));
    memcpy(result->m_data, decodedPacket, (frame_size / sizeof(VoipInt16)));
    result->m_dataSize = frame_size / sizeof(VoipInt16);

    return result;
 }

Here are some constants i use:

#define FRAME_SIZE 2880 //120, 240, 480, 960, 1920, 2880 
#define SAMPLE_RATE 48000
#define CHANNELS 1
#define APPLICATION OPUS_APPLICATION_VOIP//OPUS_APPLICATION_AUDIO
#define BITRATE 64000
#define MAX_FRAME_SIZE 4096
#define MAX_PACKET_SIZE (3*1276)

Can you help me please?


Solution

  • Your audio call back time may need increased. Try increasing your session setPreferredIOBufferDuration time. I have used opus on iOS and have measured the decoding time. It takes 2 to 3 ms to decode about 240 frames of data. There is a good chance you are missing your subsequent callbacks because it is taking to long to decode the audio.