I use ffmpeg to h265 encode yuv data, but the image after encoding is always incorrect, as shown below:
However, the following command can be used to encode correctly:ffmpeg -f rawvideo -s 480x256 -pix_fmt yuv420p -i origin.yuv -c:v hevc -f hevc -x265-params keyint=1:crf=18 out.h265
, image below:
here my code:
void H265ImageCodec::InitCPUEncoder() {
avcodec_register_all();
AVCodec* encoder = avcodec_find_encoder(AV_CODEC_ID_H265);
CHECK(encoder) << "Can not find encoder with h265.";
// context
encode_context_ = avcodec_alloc_context3(encoder);
CHECK(encode_context_) << "Could not allocate video codec context.";
encode_context_->codec_id = AV_CODEC_ID_H265;
encode_context_->profile = FF_PROFILE_HEVC_MAIN;
encode_context_->codec_type = AVMEDIA_TYPE_VIDEO;
encode_context_->width = width_; // it's 480
encode_context_->height = height_; // it's 256
encode_context_->bit_rate = 384 * 1024;
encode_context_->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV420P;
encode_context_->time_base = (AVRational){1, 25};
encode_context_->framerate = (AVRational){25, 1};
AVDictionary* options = NULL;
av_dict_set(&options, "preset", "ultrafast", 0);
av_dict_set(&options, "tune", "zero-latency", 0);
av_opt_set(encode_context_->priv_data, "x265-params", "keyint=1:crf=18",
0); // crf: Quality-controlled variable bitrate
avcodec_open2(encode_context_, encoder, &options);
encode_frame_ = av_frame_alloc();
encode_frame_->format = encode_context_->pix_fmt;
encode_frame_->width = encode_context_->width;
encode_frame_->height = encode_context_->height;
av_frame_get_buffer(encode_frame_, 0);
// packet init
encode_packet_ = av_packet_alloc();
}
std::string H265ImageCodec::EncodeImage(std::string_view raw_image) {
av_packet_unref(encode_packet_);
av_frame_make_writable(encode_frame_);
const int64 y_size = width_ * height_;
int64 offset = 0;
memcpy(encode_frame_->data[0], raw_image.data() + offset, y_size);
offset += y_size;
memcpy(encode_frame_->data[1], raw_image.data() + offset, y_size / 4);
offset += y_size / 4;
memcpy(encode_frame_->data[2], raw_image.data() + offset, y_size / 4);
avcodec_send_frame(encode_context_, encode_frame_);
int ret = avcodec_receive_packet(encode_context_, encode_packet_);
CHECK_EQ(ret, 0) << "receive encode packet ret: " << ret;
std::string h265_frame(reinterpret_cast<char*>(encode_packet_->data),
encode_packet_->size);
return h265_frame;
}
Any idea what might cause this?
As commented, the issue is that rows of U and V buffers in encode_frame_
are not continuous in memory.
When executing encode_frame_ = av_frame_alloc()
the steps are as follows:
encode_frame_->linesize[0]
= 480
encode_frame_->linesize[1]
= 256
(not equal 480/2).encode_frame_->linesize[2]
= 256
(not equal 480/2).Illustration for destination U channel in memory:
<----------- 256 bytes ----------->
<------- 240 elements ------->
^ uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
| uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
128 rows uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
| uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
V uuuuuuuuuuuuuuuuuuuuuuuuuuuuuu xxxx
For checking we may print linesize
:
printf("encode_frame_->linesize[0] = %d\n", encode_frame_->linesize[0]); //480
printf("encode_frame_->linesize[1] = %d\n", encode_frame_->linesize[1]); //256 (not 240)
printf("encode_frame_->linesize[2] = %d\n", encode_frame_->linesize[2]); //256 (not 240)
Inspired by cudaMemcpy2D, we may implement the function memcpy2D
:
//memcpy from src to dst with optional source "pitch" and destination "pitch".
//The "pitch" is the step in bytes between two rows.
//The function interface is based on cudaMemcpy2D.
static void memcpy2D(void* dst,
size_t dpitch,
const void* src,
size_t spitch,
size_t width,
size_t height)
{
const unsigned char* I = (unsigned char*)src;
unsigned char* J = (unsigned char*)dst;
for (size_t y = 0; y < height; y++)
{
const unsigned char* I0 = I + y*spitch; //Pointer to the beggining of the source row
unsigned char* J0 = J + y*dpitch; //Pointer to the beggining of the destination row
memcpy(J0, I0, width); //Copy width bytes from row I0 to row J0
}
}
Use memcpy2D
instead of memcpy
for copy data to destination frame that may not be continuous in memory:
//Copy Y channel:
memcpy2D(encode_frame_->data[0], //void* dst,
encode_frame_->linesize[0], //size_t dpitch,
raw_image.data() + offset, //const void* src,
width_, //size_t spitch,
width_, //size_t width,
height_); //size_t height)
offset += y_size;
//Copy U channel:
memcpy2D(encode_frame_->data[1], //void* dst,
encode_frame_->linesize[1], //size_t dpitch,
raw_image.data() + offset, //const void* src,
width_/2, //size_t spitch,
width_/2, //size_t width,
height_/2); //size_t height)
offset += y_size / 4;
//Copy V channel:
memcpy2D(encode_frame_->data[2], //void* dst,
encode_frame_->linesize[2], //size_t dpitch,
raw_image.data() + offset, //const void* src,
width_/2, //size_t spitch,
width_/2, //size_t width,
height_/2); //size_t height)