I am getting "bad src image ptrs" errors when trying to convert my frames to RGB with sws_scale after decoding frames from a H264 file and cannot figure out wht is going wrong.
I checked what is causing the error and found the check_image_pointers
function in swscale.c which validates that the planes and line sizes needed for the pixel format (av_pix_fmt_desc_get
) are present in the given data which seems not to be the case with my data.
The written pgm files look ok to me, also replaying the file works.
I printed the corresponding data of my frame. The problem seems that planes 1 and 2 have lines sizes of 0. All 3 of them seem to have data. Plane 0 line size is three times image width which is also confusing to me.
Here is my output:
Have videoStreamIndex 0 codec id: 27
saving frame 1 C:\\tmp\\output-frame-1.pgm colorspace 2 pix_fmt 0 w: 3840 h: 2160
Required:
plane 0 : 0
plane 1 : 1
plane 2 : 2
plane 3 : 0
Present:
Frame plane 0: 1 , 11520
Frame plane 1: 1 , 0
Frame plane 2: 1 , 0
Frame plane 3: 0 , 0
Frame plane 4: 0 , 0
Frame plane 5: 0 , 0
Frame plane 6: 0 , 0
Frame plane 7: 0 , 0
Here the whole code of my application, the issues occurs in method decode:
#include <iostream>
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <string>
#include <iostream>
#include <chrono>
// #include <opencv2/highgui.hpp>
// #include <opencv2/opencv.hpp>
extern "C"
{
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include "libavutil/imgutils.h"
}
#define INBUF_SIZE 4096
class H264Decoder
{
public:
H264Decoder(const std::string &inputFilename, const std::string &outputFilenamePrefix)
{
// Open input file
if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0)
{
throw std::runtime_error("Could not open input file");
}
if (avformat_find_stream_info(formatContext, nullptr) < 0)
{
throw std::runtime_error("Could not find stream information");
}
// Find H.264 video stream
for (unsigned i = 0; i < formatContext->nb_streams; i++)
{
if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264)
{
videoStreamIndex = i;
std::cout << "Have videoStreamIndex " << videoStreamIndex << " codec id: " << formatContext->streams[i]->codecpar->codec_id << std::endl;
break;
}
}
if (videoStreamIndex == -1)
{
throw std::runtime_error("H.264 video stream not found");
}
// Initialize codec and codec context
const AVCodec *codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
if (!codec)
{
throw std::runtime_error("Codec not found");
}
parser = av_parser_init(codec->id);
if (!parser)
{
throw std::runtime_error("parser not found");
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext)
{
throw std::runtime_error("Could not allocate codec context");
}
if (avcodec_open2(codecContext, codec, nullptr) < 0)
{
throw std::runtime_error("Could not open codec");
}
// Initialize frame
frame = av_frame_alloc();
frame->format = AV_PIX_FMT_YUV420P;
if (!frame)
{
throw std::runtime_error("Could not allocate frame");
}
inputPacket = av_packet_alloc();
if (!inputPacket)
{
throw std::runtime_error("Could not allocate packet");
}
inputFilename_ = inputFilename;
outputFilenamePrefix_ = outputFilenamePrefix;
}
void decode()
{
char buf[1024];
int ret;
ret = avcodec_send_packet(codecContext, inputPacket);
if (ret < 0)
{
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0)
{
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0)
{
fprintf(stderr, "Error during decoding\n");
exit(1);
}
/* the picture is allocated by the decoder. no need to
free it */
snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);
std::cout << "saving frame " << codecContext->frame_num << " " << buf << " colorspace " << frame->colorspace << " pix_fmt " << codecContext->pix_fmt << " w: " << frame->width << " h: " << frame->height << std::endl;
SwsContext *sws_ctx = NULL;
sws_ctx = sws_getContext(codecContext->width,
codecContext->height,
codecContext->pix_fmt,
codecContext->width,
codecContext->height,
AV_PIX_FMT_RGB24,
SWS_BICUBIC,
NULL,
NULL,
NULL);
AVFrame *frame2 = av_frame_alloc();
int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
uint8_t *frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
std::cout << "Required:" << std::endl;
for (int i = 0; i < 4; i++)
{
int plane = desc->comp[i].plane;
std::cout << "plane " << i << " : " << plane << std::endl;
}
std::cout << "Present:" << std::endl;
for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i)
{
std::cout << "Frame plane " << i << ": " << static_cast<bool>(frame->data[i]) << " , " << frame->linesize[i] << std::endl;
}
sws_scale(sws_ctx, frame->data,
frame->linesize, 0, codecContext->height,
frame2->data, frame2->linesize);
// cv::Mat img(frame2->height, frame2->width, CV_8UC3, frame2->data[0]);
// cv::imshow("Image", img);
pgm_save(frame->data[0], frame->linesize[0],
frame->width, frame->height, buf);
}
}
~H264Decoder()
{
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_packet_free(&inputPacket);
}
void readAndDecode()
{
FILE *f;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t *data;
size_t data_size;
int ret;
int eof;
f = fopen(inputFilename_.c_str(), "rb");
auto start = std::chrono::high_resolution_clock::now();
do
{
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (ferror(f))
break;
eof = !data_size;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0 || eof)
{
ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size,
data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0)
{
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (inputPacket->size)
{
decode();
}
else if (eof)
{
break;
}
}
} while (!eof);
auto diff = std::chrono::high_resolution_clock::now() - start;
std::cout << "Decoded " << codecContext->frame_num << " frames in " << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms" << std::endl;
}
private:
AVFormatContext *formatContext = nullptr;
AVCodecContext *codecContext = nullptr;
AVCodecParserContext *parser;
AVFrame *frame = nullptr;
AVFrame *frameRgb = nullptr;
AVPacket *inputPacket = nullptr;
int videoStreamIndex = -1;
std::string inputFilename_;
std::string outputFilenamePrefix_;
static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char *filename)
{
FILE *f = fopen(filename, "wb");
if (!f)
{
std::cout << "Error opening file for saving PGM" << std::endl;
exit(1);
}
fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
for (int i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize, f);
fclose(f);
}
};
int main(int argc, char *argv[])
{
if (argc < 2)
{
std::cout << "Please provide input file name as parameter" << std::endl;
}
std::string inputFilename = argv[1];
std::string outputFilenamePrefix = "C:\\tmp\\output-frame";
try
{
H264Decoder decoder(inputFilename, outputFilenamePrefix);
decoder.readAndDecode();
}
catch (const std::exception &e)
{
std::cout << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}
The issue was me trying to initialize frame2. I overrode the linesize of the yuv frame instead of the rgb frame: av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
I removed the complete initialization of frame2 and used av_image_alloc
to allocate the buffers for the rgb frame.
Here is my current working code in case anyone wants to use it as a reference. Conversion changed from RGB to BGR to show it with OpenCV.
#include <chrono>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#define INBUF_SIZE 4096
class H264Decoder {
public:
H264Decoder(const std::string& inputFilename, const std::string& outputFilenamePrefix, uint16_t outputWidth,
uint16_t outputHeight, bool show, bool save)
: doShow(show), doSave(save), inputFilename_(inputFilename), outputFilenamePrefix_(outputFilenamePrefix),
outputHeight(outputHeight), outputWidth(outputWidth) {
// Open input file
if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0) {
throw std::runtime_error("Could not open input file");
}
if (avformat_find_stream_info(formatContext, nullptr) < 0) {
throw std::runtime_error("Could not find stream information");
}
// Find H.264 video stream
for (unsigned i = 0; i < formatContext->nb_streams; i++) {
if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) {
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) {
throw std::runtime_error("H.264 video stream not found");
}
// Initialize codec and codec context
codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
if (!codec) {
throw std::runtime_error("Codec not found");
}
parser = av_parser_init(codec->id);
if (!parser) {
throw std::runtime_error("parser not found");
}
codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
throw std::runtime_error("Could not allocate codec context");
}
if (avcodec_open2(codecContext, codec, nullptr) < 0) {
throw std::runtime_error("Could not open codec");
}
// Initialize frame
frame = av_frame_alloc();
if (!frame) {
throw std::runtime_error("Could not allocate frame");
}
frameRgb = av_frame_alloc();
if (!frameRgb) {
throw std::runtime_error("Could not allocate frame");
}
av_image_alloc(frameRgb->data, frameRgb->linesize, outputWidth, outputHeight, AV_PIX_FMT_BGR24, 32);
inputPacket = av_packet_alloc();
if (!inputPacket) {
throw std::runtime_error("Could not allocate packet");
}
}
void decode() {
char buf[1024];
int ret;
ret = avcodec_send_packet(codecContext, inputPacket);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_frame(codecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);
SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(codecContext->width, codecContext->height, codecContext->pix_fmt, outputWidth,
outputHeight, AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL);
if (doSave) {
pgm_save(frame->data[0], frame->linesize[0], frame->width, frame->height, buf);
}
sws_scale(sws_ctx, frame->data, frame->linesize, 0, codecContext->height, frameRgb->data,
frameRgb->linesize);
if (doShow) {
cv::Mat img(outputHeight, outputWidth, CV_8UC3, frameRgb->data[0]);
cv::imshow("Image", img);
cv::waitKey(1);
}
}
}
~H264Decoder() {
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
av_frame_free(&frame);
av_frame_free(&frameRgb);
av_packet_free(&inputPacket);
av_freep(&frameRgb->data[0]);
}
void readAndDecode() {
FILE* f;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t* data;
size_t data_size;
int ret;
int eof;
f = fopen(inputFilename_.c_str(), "rb");
if (!f) {
std::cout << "Error opening file" << std::endl;;
exit(1);
}
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
auto start = std::chrono::high_resolution_clock::now();
do {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (ferror(f))
break;
eof = !data_size;
/* use the parser to split the data into frames */
data = inbuf;
while (data_size > 0 || eof) {
ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size, data, data_size,
AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (inputPacket->size) {
decode();
} else if (eof) {
break;
}
}
} while (!eof);
auto diff = std::chrono::high_resolution_clock::now() - start;
std::cout << "Decoded " << codecContext->frame_num << " frames in "
<< std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms "
<< std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() / codecContext->frame_num
<< " ms/frame " << std::endl;
}
private:
bool doShow{false};
bool doSave{true};
const AVCodec* codec;
AVFormatContext* formatContext = nullptr;
AVCodecContext* codecContext = nullptr;
AVCodecParserContext* parser;
AVFrame* frame = nullptr;
AVFrame* frameRgb = nullptr;
AVPacket* inputPacket = nullptr;
int videoStreamIndex = -1;
std::string inputFilename_;
std::string outputFilenamePrefix_;
uint16_t outputHeight = 1280;
uint16_t outputWidth = 1632;
static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename) {
FILE* f = fopen(filename, "wb");
if (!f) {
std::cout << "Error opening file for saving PGM" << std::endl;
exit(1);
}
fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
for (int i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize, f);
fclose(f);
}
};
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "Please provide input file name as parameter" << std::endl;
exit(1);
}
std::string inputFilename = argv[1];
std::string outputFilenamePrefix = "C:\\tmp\\pics\\output-frame";
try {
H264Decoder decoder(inputFilename, outputFilenamePrefix, 1632, 1280, true, false);
decoder.readAndDecode();
} catch (const std::exception& e) {
std::cout << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}