c++ffmpeghevc

ffmpeg hevc encoding failure


I use ffmpeg to h265 encode yuv data, but the image after encoding is always incorrect, as shown below: enter image description here

However, the following command can be used to encode correctly:ffmpeg -f rawvideo -s 480x256 -pix_fmt yuv420p -i origin.yuv -c:v hevc -f hevc -x265-params keyint=1:crf=18 out.h265, image below:enter image description here

here my code:

void H265ImageCodec::InitCPUEncoder() {
  avcodec_register_all();
  AVCodec* encoder = avcodec_find_encoder(AV_CODEC_ID_H265);
  CHECK(encoder) << "Can not find encoder with h265.";
  // context
  encode_context_ = avcodec_alloc_context3(encoder);
  CHECK(encode_context_) << "Could not allocate video codec context.";
  encode_context_->codec_id = AV_CODEC_ID_H265;
  encode_context_->profile = FF_PROFILE_HEVC_MAIN;
  encode_context_->codec_type = AVMEDIA_TYPE_VIDEO;
  encode_context_->width = width_; // it's 480
  encode_context_->height = height_; // it's 256
  encode_context_->bit_rate = 384 * 1024;
  encode_context_->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV420P;
  encode_context_->time_base = (AVRational){1, 25};
  encode_context_->framerate = (AVRational){25, 1};
  AVDictionary* options = NULL;
  av_dict_set(&options, "preset", "ultrafast", 0);
  av_dict_set(&options, "tune", "zero-latency", 0);
  av_opt_set(encode_context_->priv_data, "x265-params", "keyint=1:crf=18",
             0);  // crf: Quality-controlled variable bitrate
  avcodec_open2(encode_context_, encoder, &options);

  encode_frame_ = av_frame_alloc();
  encode_frame_->format = encode_context_->pix_fmt;
  encode_frame_->width = encode_context_->width;
  encode_frame_->height = encode_context_->height;
  av_frame_get_buffer(encode_frame_, 0);
  // packet init
  encode_packet_ = av_packet_alloc();
}

std::string H265ImageCodec::EncodeImage(std::string_view raw_image) {
  av_packet_unref(encode_packet_);
  av_frame_make_writable(encode_frame_);
  const int64 y_size = width_ * height_;
  int64 offset = 0;
  memcpy(encode_frame_->data[0], raw_image.data() + offset, y_size);
  offset += y_size;
  memcpy(encode_frame_->data[1], raw_image.data() + offset, y_size / 4);
  offset += y_size / 4;
  memcpy(encode_frame_->data[2], raw_image.data() + offset, y_size / 4);
  avcodec_send_frame(encode_context_, encode_frame_);
  int ret = avcodec_receive_packet(encode_context_, encode_packet_);
  CHECK_EQ(ret, 0) << "receive encode packet ret: " << ret;
  std::string h265_frame(reinterpret_cast<char*>(encode_packet_->data),
                         encode_packet_->size);
  return h265_frame;
}

Any idea what might cause this?


Solution

  • As commented, the issue is that rows of U and V buffers in encode_frame_ are not continuous in memory.

    When executing encode_frame_ = av_frame_alloc() the steps are as follows:

    Illustration for destination U channel in memory:

             <----------- 256 bytes ----------->
             <------- 240 elements ------->
        ^    uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
        |    uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
    128 rows uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
        |    uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
        V    uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
    

    For checking we may print linesize:

    printf("encode_frame_->linesize[0] = %d\n", encode_frame_->linesize[0]);  //480
    printf("encode_frame_->linesize[1] = %d\n", encode_frame_->linesize[1]);  //256 (not 240)
    printf("encode_frame_->linesize[2] = %d\n", encode_frame_->linesize[2]);  //256 (not 240)
    

    Inspired by cudaMemcpy2D, we may implement the function memcpy2D:

    //memcpy from src to dst with optional source "pitch" and destination "pitch".
    //The "pitch" is the step in bytes between two rows.
    //The function interface is based on cudaMemcpy2D.
    static void memcpy2D(void*         dst,
                         size_t        dpitch,
                         const void*   src,
                         size_t        spitch,
                         size_t        width,
                         size_t        height)
    {
        const unsigned char* I = (unsigned char*)src;
        unsigned char* J = (unsigned char*)dst;
    
        for (size_t y = 0; y < height; y++)
        {
            const unsigned char* I0 = I + y*spitch; //Pointer to the beggining of the source row
            unsigned char* J0 = J + y*dpitch; //Pointer to the beggining of the destination row
            memcpy(J0, I0, width);  //Copy width bytes from row I0 to row J0
        }
    }
    

    Use memcpy2D instead of memcpy for copy data to destination frame that may not be continuous in memory:

    //Copy Y channel:
    memcpy2D(encode_frame_->data[0],      //void* dst,
             encode_frame_->linesize[0],  //size_t        dpitch,
             raw_image.data() + offset,   //const void* src,
             width_,                      //size_t        spitch,
             width_,                      //size_t        width,
             height_);                    //size_t        height)
    
    offset += y_size;
    
    //Copy U channel:
    memcpy2D(encode_frame_->data[1],      //void* dst,
             encode_frame_->linesize[1],  //size_t        dpitch,
             raw_image.data() + offset,   //const void* src,
             width_/2,                    //size_t        spitch,
             width_/2,                    //size_t        width,
             height_/2);                  //size_t        height)
    
    offset += y_size / 4;
    
    //Copy V channel:
    memcpy2D(encode_frame_->data[2],      //void* dst,
             encode_frame_->linesize[2],  //size_t        dpitch,
             raw_image.data() + offset,   //const void* src,
             width_/2,                    //size_t        spitch,
             width_/2,                    //size_t        width,
             height_/2);                  //size_t        height)