I'm using avcodec to decode some hevc clip on linux, the avcodec lib is built from source package ffmpeg-4.3.1 with command:
configure --prefix=/mnt/projects/ffmpeg-4.3.1/build --disable-static --enable-shared --disable-stripping && make
My application initialize codec & codec context like below:
AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H265);
AVCodecContext* avContext = avcodec_alloc_context3(codec);
avcodec_open2(_private->_avContext, codec, NULL);
My dev env:
oap-dev@oap-dev:ffmpeg-4.3.1$ lsb_release -d
Description: Ubuntu 18.04.5 LTS
oap-dev@oap-dev:ffmpeg-4.3.1$ uname -a
Linux oap-dev 4.15.0-163-generic #171-Ubuntu SMP Fri Nov 5 11:55:11 UTC 2021 x86_64 x86_64
x86_64 GNU/Linux
The ffplay built from same source code can play the clip without error, but my application ran into below segment fault:
#0 ff_hevc_sao_edge_filter_32_8_avx2 () at libavcodec/x86/hevc_sao.asm:337
337 HEVC_SAO_EDGE_FILTER 32, 1, a
[Current thread is 1 (Thread 0x7f26e2814700 (LWP 99189))]
(gdb) bt
#0 ff_hevc_sao_edge_filter_32_8_avx2 () at libavcodec/x86/hevc_sao.asm:337
#1 0x00007f26f9425853 in sao_filter_CTB (s=s@entry=0x7f26dc0069c0, x=x@entry=0, y=y@entry=0) at libavcodec/hevc_filter.c:436
#2 0x00007f26f9427e04 in ff_hevc_hls_filter (s=0x7f26dc0069c0, x=x@entry=64, y=y@entry=64, ctb_size=ctb_size@entry=64) at libavcodec/hevc_filter.c:861
#3 0x00007f26f9428fa5 in ff_hevc_hls_filters (s=s@entry=0x7f26dc0069c0, x_ctb=x_ctb@entry=128, y_ctb=y_ctb@entry=128, ctb_size=ctb_size@entry=64) at libavcodec/hevc_filter.c:883
#4 0x00007f26f94405ae in hls_decode_entry (avctxt=<optimized out>, isFilterThread=<optimized out>) at libavcodec/hevcdec.c:2462
#5 0x00007f26f970dbf5 in avcodec_default_execute (c=0x7f26dc006180, func=0x7f26f9440300 <hls_decode_entry>, arg=<optimized out>, ret=<optimized out>, count=<optimized out>, size=4) at libavcodec/utils.c:446
#6 0x00007f26f9444c18 in hls_slice_data (s=0x7f26dc0069c0) at libavcodec/hevcdec.c:2480
#7 decode_nal_unit (nal=<optimized out>, s=0x7f26dc0069c0) at libavcodec/hevcdec.c:3015
#8 decode_nal_units (length=<optimized out>, buf=<optimized out>, s=0x7f26dc0069c0) at libavcodec/hevcdec.c:3088
#9 hevc_decode_frame (avctx=<optimized out>, data=<optimized out>, got_output=<optimized out>, avpkt=<optimized out>) at libavcodec/hevcdec.c:3226
#10 0x00007f26f96148a3 in frame_worker_thread (arg=0x7f26dc005dc0) at libavcodec/pthread_frame.c:201
#11 0x00007f26fbbd26db in start_thread (arg=0x7f26e2814700) at pthread_create.c:463
#12 0x00007f26f7d5971f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
More information: If I add option "--disable-avx2" and rebuild avcodec lib, my application can play the clip successfully.
Why there is segment fault issue with avx2 enabled? Any help is appreciated.
==========================================
Update per Peter Corde's response:
(gdb) bt
#0 0x00007f174d91ce3e in ff_hevc_add_residual_32_8_avx2 () from /mnt/projects/ffmpeg-4.3.1/build/lib/libavcodec.so.58
#1 0x00007f174d46c401 in hls_transform_tree (s=0x7f17300069c0, x0=0, y0=960, xBase=2880, yBase=0, cb_xBase=0, cb_yBase=240, log2_cb_size=914341248, log2_trafo_size=1340287338, trafo_depth=51, blk_idx=3, base_cbf_cb=0x5f15, base_cbf_cr=0x0) at libavcodec/hevcdec.c:1122
#2 0x00007f174d46f993 in hls_coding_quadtree (s=0x7f17300069c0, x0=805345792, y0=960, log2_cb_size=2880, cb_depth=8) at libavcodec/hevcdec.c:2246
#3 0x00007f174d46f578 in hls_coding_quadtree (s=0x7f17300069c0, x0=805345792, y0=960, log2_cb_size=2880, cb_depth=8) at libavcodec/hevcdec.c:2312
#4 0x00007f174d47157f in hls_decode_entry (avctxt=0x7f17481a0010, isFilterThread=0x7f1730009a00) at libavcodec/hevcdec.c:2453
#5 0x00007f174d73ebf5 in avcodec_default_execute (c=0x7f17481a0010, func=0x7f1730009a00, arg=0x3c0, ret=0xb40, count=8, size=0) at libavcodec/utils.c:446
#6 0x00007f174d475c18 in hevc_decode_frame (avctx=0x7f17481a0010, data=0x7f1730009a00, got_output=0x3c0, avpkt=0x7f1730009880) at libavcodec/hevcdec.c:2480
#7 0x00007f174d6458a3 in frame_worker_thread (arg=0x7f1730005dc0) at libavcodec/pthread_frame.c:201
#8 0x00007f174fc036db in start_thread (arg=0x7f17367fc700) at pthread_create.c:463
#9 0x00007f174bd8a71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) disas
Dump of assembler code for function ff_hevc_add_residual_32_8_avx2:
0x00007f174d91ce30 <+0>: vpxor %ymm0,%ymm0,%ymm0
0x00007f174d91ce34 <+4>: lea (%rdx,%rdx,2),%rcx
0x00007f174d91ce38 <+8>: mov $0x8,%r8d
=> 0x00007f174d91ce3e <+14>: vmovdqa (%rdi),%ymm1
0x00007f174d91ce42 <+18>: vmovdqa %ymm1,%ymm2
0x00007f174d91ce46 <+22>: vpunpcklbw %ymm0,%ymm1,%ymm1
0x00007f174d91ce4a <+26>: vpunpckhbw %ymm0,%ymm2,%ymm2
0x00007f174d91ce4e <+30>: vmovdqa (%rsi),%xmm5
0x00007f174d91ce52 <+34>: vmovdqa 0x10(%rsi),%xmm6
0x00007f174d91ce57 <+39>: vinserti128 $0x1,0x20(%rsi),%ymm5,%ymm5
0x00007f174d91ce5e <+46>: vinserti128 $0x1,0x30(%rsi),%ymm6,%ymm6
0x00007f174d91ce65 <+53>: vpaddsw %ymm5,%ymm1,%ymm1
0x00007f174d91ce69 <+57>: vpaddsw %ymm6,%ymm2,%ymm2
0x00007f174d91ce6d <+61>: vmovdqa (%rdi,%rdx,1),%ymm3
0x00007f174d91ce72 <+66>: vmovdqa %ymm3,%ymm4
0x00007f174d91ce76 <+70>: vpunpcklbw %ymm0,%ymm3,%ymm3
0x00007f174d91ce7a <+74>: vpunpckhbw %ymm0,%ymm4,%ymm4
0x00007f174d91ce7e <+78>: vmovdqa 0x40(%rsi),%xmm5
0x00007f174d91ce83 <+83>: vmovdqa 0x50(%rsi),%xmm6
0x00007f174d91ce88 <+88>: vinserti128 $0x1,0x60(%rsi),%ymm5,%ymm5
0x00007f174d91ce8f <+95>: vinserti128 $0x1,0x70(%rsi),%ymm6,%ymm6
0x00007f174d91ce96 <+102>: vpaddsw %ymm5,%ymm3,%ymm3
0x00007f174d91ce9a <+106>: vpaddsw %ymm6,%ymm4,%ymm4
0x00007f174d91ce9e <+110>: vpackuswb %ymm2,%ymm1,%ymm1
0x00007f174d91cea2 <+114>: vpackuswb %ymm4,%ymm3,%ymm3
0x00007f174d91cea6 <+118>: vmovdqa %ymm1,(%rdi)
0x00007f174d91ceaa <+122>: vmovdqa %ymm3,(%rdi,%rdx,1)
0x00007f174d91ceaf <+127>: vmovdqa (%rdi,%rdx,2),%ymm1
0x00007f174d91ceb4 <+132>: vmovdqa %ymm1,%ymm2
0x00007f174d91ceb8 <+136>: vpunpcklbw %ymm0,%ymm1,%ymm1
0x00007f174d91cebc <+140>: vpunpckhbw %ymm0,%ymm2,%ymm2
0x00007f174d91cec0 <+144>: vmovdqa 0x80(%rsi),%xmm5
0x00007f174d91cec8 <+152>: vmovdqa 0x90(%rsi),%xmm6
0x00007f174d91ced0 <+160>: vinserti128 $0x1,0xa0(%rsi),%ymm5,%ymm5
0x00007f174d91ceda <+170>: vinserti128 $0x1,0xb0(%rsi),%ymm6,%ymm6
0x00007f174d91cee4 <+180>: vpaddsw %ymm5,%ymm1,%ymm1
0x00007f174d91cee8 <+184>: vpaddsw %ymm6,%ymm2,%ymm2
0x00007f174d91ceec <+188>: vmovdqa (%rdi,%rcx,1),%ymm3
0x00007f174d91cef1 <+193>: vmovdqa %ymm3,%ymm4
0x00007f174d91cef5 <+197>: vpunpcklbw %ymm0,%ymm3,%ymm3
0x00007f174d91cef9 <+201>: vpunpckhbw %ymm0,%ymm4,%ymm4
---Type <return> to continue, or q <return> to quit---
0x00007f174d91cefd <+205>: vmovdqa 0xc0(%rsi),%xmm5
0x00007f174d91cf05 <+213>: vmovdqa 0xd0(%rsi),%xmm6
0x00007f174d91cf0d <+221>: vinserti128 $0x1,0xe0(%rsi),%ymm5,%ymm5
0x00007f174d91cf17 <+231>: vinserti128 $0x1,0xf0(%rsi),%ymm6,%ymm6
0x00007f174d91cf21 <+241>: vpaddsw %ymm5,%ymm3,%ymm3
0x00007f174d91cf25 <+245>: vpaddsw %ymm6,%ymm4,%ymm4
0x00007f174d91cf29 <+249>: vpackuswb %ymm2,%ymm1,%ymm1
0x00007f174d91cf2d <+253>: vpackuswb %ymm4,%ymm3,%ymm3
0x00007f174d91cf31 <+257>: vmovdqa %ymm1,(%rdi,%rdx,2)
0x00007f174d91cf36 <+262>: vmovdqa %ymm3,(%rdi,%rcx,1)
0x00007f174d91cf3b <+267>: add $0x100,%rsi
0x00007f174d91cf42 <+274>: lea (%rdi,%rdx,4),%rdi
0x00007f174d91cf46 <+278>: dec %r8d
0x00007f174d91cf49 <+281>: jg 0x7f174d91ce3e <ff_hevc_add_residual_32_8_avx2+14>
0x00007f174d91cf4f <+287>: vzeroupper
0x00007f174d91cf52 <+290>: retq
End of assembler dump.
(gdb) info reg
rax 0x7f174d91ce30 139738062376496
rbx 0x7f17300069c0 139737566308800
rcx 0xb40 2880
rdx 0x3c0 960
rsi 0x7f1730009a00 139737566321152
rdi 0x7f17481a0010 139737970638864
rbp 0x7f1730009880 0x7f1730009880
rsp 0x7f17367fb9b8 0x7f17367fb9b8
r8 0x8 8
r9 0x0 0
r10 0xa 10
r11 0xa 10
r12 0x0 0
r13 0x2 2
r14 0x0 0
r15 0x0 0
rip 0x7f174d91ce3e 0x7f174d91ce3e <ff_hevc_add_residual_32_8_avx2+14>
eflags 0x10206 [ PF IF RF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
I do decoding like below:
while (!exit) {
avcodec_send_packet(AVCodecConext * ctx, AVPacket* pkt);
// check return value ...
avcodec_receive_frame(AVCodecConext * ctx, AVFrame* frame);
//check return value ...
}
Before decoding, the public member of AVCodecContext, get_buffer2, which is a callback function and will be called when calling avcodec_receive_frame(). I assign below function to get_buffer2:
int get_frame_buffer(struct AVCodecContext *c, AVFrame *frame, int flags) {
frame->data[0] = posix_memalign(32, frmaeSize); // previously, I used operator new,which will trigger coredump when decoding HEVC
// ...
}