ffmpegavx2hevcavcodec

ffmpeg avcodec lib crashed on ff_hevc_sao_edge_filter_32_8_avx2()


I'm using avcodec to decode some hevc clip on linux, the avcodec lib is built from source package ffmpeg-4.3.1 with command:

configure --prefix=/mnt/projects/ffmpeg-4.3.1/build --disable-static --enable-shared --disable-stripping && make

My application initialize codec & codec context like below:

AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H265); 
AVCodecContext* avContext = avcodec_alloc_context3(codec);
avcodec_open2(_private->_avContext, codec, NULL);

My dev env:

oap-dev@oap-dev:ffmpeg-4.3.1$ lsb_release -d
Description:        Ubuntu 18.04.5 LTS
oap-dev@oap-dev:ffmpeg-4.3.1$ uname -a
Linux oap-dev 4.15.0-163-generic #171-Ubuntu SMP Fri Nov 5 11:55:11 UTC 2021 x86_64 x86_64 
x86_64 GNU/Linux

The ffplay built from same source code can play the clip without error, but my application ran into below segment fault:

#0  ff_hevc_sao_edge_filter_32_8_avx2 () at libavcodec/x86/hevc_sao.asm:337
337        HEVC_SAO_EDGE_FILTER 32, 1, a
[Current thread is 1 (Thread 0x7f26e2814700 (LWP 99189))]
(gdb) bt
#0  ff_hevc_sao_edge_filter_32_8_avx2 () at libavcodec/x86/hevc_sao.asm:337
#1  0x00007f26f9425853 in sao_filter_CTB (s=s@entry=0x7f26dc0069c0, x=x@entry=0, y=y@entry=0) at libavcodec/hevc_filter.c:436
#2  0x00007f26f9427e04 in ff_hevc_hls_filter (s=0x7f26dc0069c0, x=x@entry=64, y=y@entry=64, ctb_size=ctb_size@entry=64) at libavcodec/hevc_filter.c:861
#3  0x00007f26f9428fa5 in ff_hevc_hls_filters (s=s@entry=0x7f26dc0069c0, x_ctb=x_ctb@entry=128, y_ctb=y_ctb@entry=128, ctb_size=ctb_size@entry=64) at libavcodec/hevc_filter.c:883
#4  0x00007f26f94405ae in hls_decode_entry (avctxt=<optimized out>, isFilterThread=<optimized out>) at libavcodec/hevcdec.c:2462
#5  0x00007f26f970dbf5 in avcodec_default_execute (c=0x7f26dc006180, func=0x7f26f9440300 <hls_decode_entry>, arg=<optimized out>, ret=<optimized out>, count=<optimized out>, size=4) at libavcodec/utils.c:446
#6  0x00007f26f9444c18 in hls_slice_data (s=0x7f26dc0069c0) at libavcodec/hevcdec.c:2480
#7  decode_nal_unit (nal=<optimized out>, s=0x7f26dc0069c0) at libavcodec/hevcdec.c:3015
#8  decode_nal_units (length=<optimized out>, buf=<optimized out>, s=0x7f26dc0069c0) at libavcodec/hevcdec.c:3088
#9  hevc_decode_frame (avctx=<optimized out>, data=<optimized out>, got_output=<optimized out>, avpkt=<optimized out>) at libavcodec/hevcdec.c:3226
#10 0x00007f26f96148a3 in frame_worker_thread (arg=0x7f26dc005dc0) at libavcodec/pthread_frame.c:201
#11 0x00007f26fbbd26db in start_thread (arg=0x7f26e2814700) at pthread_create.c:463
#12 0x00007f26f7d5971f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

More information: If I add option "--disable-avx2" and rebuild avcodec lib, my application can play the clip successfully.

Why there is segment fault issue with avx2 enabled? Any help is appreciated.

==========================================
Update per Peter Corde's response:

(gdb) bt
#0  0x00007f174d91ce3e in ff_hevc_add_residual_32_8_avx2 () from /mnt/projects/ffmpeg-4.3.1/build/lib/libavcodec.so.58
#1  0x00007f174d46c401 in hls_transform_tree (s=0x7f17300069c0, x0=0, y0=960, xBase=2880, yBase=0, cb_xBase=0, cb_yBase=240, log2_cb_size=914341248, log2_trafo_size=1340287338, trafo_depth=51, blk_idx=3, base_cbf_cb=0x5f15, base_cbf_cr=0x0) at libavcodec/hevcdec.c:1122
#2  0x00007f174d46f993 in hls_coding_quadtree (s=0x7f17300069c0, x0=805345792, y0=960, log2_cb_size=2880, cb_depth=8) at libavcodec/hevcdec.c:2246
#3  0x00007f174d46f578 in hls_coding_quadtree (s=0x7f17300069c0, x0=805345792, y0=960, log2_cb_size=2880, cb_depth=8) at libavcodec/hevcdec.c:2312
#4  0x00007f174d47157f in hls_decode_entry (avctxt=0x7f17481a0010, isFilterThread=0x7f1730009a00) at libavcodec/hevcdec.c:2453
#5  0x00007f174d73ebf5 in avcodec_default_execute (c=0x7f17481a0010, func=0x7f1730009a00, arg=0x3c0, ret=0xb40, count=8, size=0) at libavcodec/utils.c:446
#6  0x00007f174d475c18 in hevc_decode_frame (avctx=0x7f17481a0010, data=0x7f1730009a00, got_output=0x3c0, avpkt=0x7f1730009880) at libavcodec/hevcdec.c:2480
#7  0x00007f174d6458a3 in frame_worker_thread (arg=0x7f1730005dc0) at libavcodec/pthread_frame.c:201
#8  0x00007f174fc036db in start_thread (arg=0x7f17367fc700) at pthread_create.c:463
#9  0x00007f174bd8a71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) disas
Dump of assembler code for function ff_hevc_add_residual_32_8_avx2:
   0x00007f174d91ce30 <+0>: vpxor  %ymm0,%ymm0,%ymm0
   0x00007f174d91ce34 <+4>: lea    (%rdx,%rdx,2),%rcx
   0x00007f174d91ce38 <+8>: mov    $0x8,%r8d
=> 0x00007f174d91ce3e <+14>:    vmovdqa (%rdi),%ymm1
   0x00007f174d91ce42 <+18>:    vmovdqa %ymm1,%ymm2
   0x00007f174d91ce46 <+22>:    vpunpcklbw %ymm0,%ymm1,%ymm1
   0x00007f174d91ce4a <+26>:    vpunpckhbw %ymm0,%ymm2,%ymm2
   0x00007f174d91ce4e <+30>:    vmovdqa (%rsi),%xmm5
   0x00007f174d91ce52 <+34>:    vmovdqa 0x10(%rsi),%xmm6
   0x00007f174d91ce57 <+39>:    vinserti128 $0x1,0x20(%rsi),%ymm5,%ymm5
   0x00007f174d91ce5e <+46>:    vinserti128 $0x1,0x30(%rsi),%ymm6,%ymm6
   0x00007f174d91ce65 <+53>:    vpaddsw %ymm5,%ymm1,%ymm1
   0x00007f174d91ce69 <+57>:    vpaddsw %ymm6,%ymm2,%ymm2
   0x00007f174d91ce6d <+61>:    vmovdqa (%rdi,%rdx,1),%ymm3
   0x00007f174d91ce72 <+66>:    vmovdqa %ymm3,%ymm4
   0x00007f174d91ce76 <+70>:    vpunpcklbw %ymm0,%ymm3,%ymm3
   0x00007f174d91ce7a <+74>:    vpunpckhbw %ymm0,%ymm4,%ymm4
   0x00007f174d91ce7e <+78>:    vmovdqa 0x40(%rsi),%xmm5
   0x00007f174d91ce83 <+83>:    vmovdqa 0x50(%rsi),%xmm6
   0x00007f174d91ce88 <+88>:    vinserti128 $0x1,0x60(%rsi),%ymm5,%ymm5
   0x00007f174d91ce8f <+95>:    vinserti128 $0x1,0x70(%rsi),%ymm6,%ymm6
   0x00007f174d91ce96 <+102>:   vpaddsw %ymm5,%ymm3,%ymm3
   0x00007f174d91ce9a <+106>:   vpaddsw %ymm6,%ymm4,%ymm4
   0x00007f174d91ce9e <+110>:   vpackuswb %ymm2,%ymm1,%ymm1
   0x00007f174d91cea2 <+114>:   vpackuswb %ymm4,%ymm3,%ymm3
   0x00007f174d91cea6 <+118>:   vmovdqa %ymm1,(%rdi)
   0x00007f174d91ceaa <+122>:   vmovdqa %ymm3,(%rdi,%rdx,1)
   0x00007f174d91ceaf <+127>:   vmovdqa (%rdi,%rdx,2),%ymm1
   0x00007f174d91ceb4 <+132>:   vmovdqa %ymm1,%ymm2
   0x00007f174d91ceb8 <+136>:   vpunpcklbw %ymm0,%ymm1,%ymm1
   0x00007f174d91cebc <+140>:   vpunpckhbw %ymm0,%ymm2,%ymm2
   0x00007f174d91cec0 <+144>:   vmovdqa 0x80(%rsi),%xmm5
   0x00007f174d91cec8 <+152>:   vmovdqa 0x90(%rsi),%xmm6
   0x00007f174d91ced0 <+160>:   vinserti128 $0x1,0xa0(%rsi),%ymm5,%ymm5
   0x00007f174d91ceda <+170>:   vinserti128 $0x1,0xb0(%rsi),%ymm6,%ymm6
   0x00007f174d91cee4 <+180>:   vpaddsw %ymm5,%ymm1,%ymm1
   0x00007f174d91cee8 <+184>:   vpaddsw %ymm6,%ymm2,%ymm2
   0x00007f174d91ceec <+188>:   vmovdqa (%rdi,%rcx,1),%ymm3
   0x00007f174d91cef1 <+193>:   vmovdqa %ymm3,%ymm4
   0x00007f174d91cef5 <+197>:   vpunpcklbw %ymm0,%ymm3,%ymm3
   0x00007f174d91cef9 <+201>:   vpunpckhbw %ymm0,%ymm4,%ymm4
---Type <return> to continue, or q <return> to quit---
   0x00007f174d91cefd <+205>:   vmovdqa 0xc0(%rsi),%xmm5
   0x00007f174d91cf05 <+213>:   vmovdqa 0xd0(%rsi),%xmm6
   0x00007f174d91cf0d <+221>:   vinserti128 $0x1,0xe0(%rsi),%ymm5,%ymm5
   0x00007f174d91cf17 <+231>:   vinserti128 $0x1,0xf0(%rsi),%ymm6,%ymm6
   0x00007f174d91cf21 <+241>:   vpaddsw %ymm5,%ymm3,%ymm3
   0x00007f174d91cf25 <+245>:   vpaddsw %ymm6,%ymm4,%ymm4
   0x00007f174d91cf29 <+249>:   vpackuswb %ymm2,%ymm1,%ymm1
   0x00007f174d91cf2d <+253>:   vpackuswb %ymm4,%ymm3,%ymm3
   0x00007f174d91cf31 <+257>:   vmovdqa %ymm1,(%rdi,%rdx,2)
   0x00007f174d91cf36 <+262>:   vmovdqa %ymm3,(%rdi,%rcx,1)
   0x00007f174d91cf3b <+267>:   add    $0x100,%rsi
   0x00007f174d91cf42 <+274>:   lea    (%rdi,%rdx,4),%rdi
   0x00007f174d91cf46 <+278>:   dec    %r8d
   0x00007f174d91cf49 <+281>:   jg     0x7f174d91ce3e <ff_hevc_add_residual_32_8_avx2+14>
   0x00007f174d91cf4f <+287>:   vzeroupper 
   0x00007f174d91cf52 <+290>:   retq   
End of assembler dump.

(gdb) info reg
rax            0x7f174d91ce30   139738062376496
rbx            0x7f17300069c0   139737566308800
rcx            0xb40    2880
rdx            0x3c0    960
rsi            0x7f1730009a00   139737566321152
rdi            0x7f17481a0010   139737970638864
rbp            0x7f1730009880   0x7f1730009880
rsp            0x7f17367fb9b8   0x7f17367fb9b8
r8             0x8  8
r9             0x0  0
r10            0xa  10
r11            0xa  10
r12            0x0  0
r13            0x2  2
r14            0x0  0
r15            0x0  0
rip            0x7f174d91ce3e   0x7f174d91ce3e <ff_hevc_add_residual_32_8_avx2+14>
eflags         0x10206  [ PF IF RF ]
cs             0x33 51
ss             0x2b 43
ds             0x0  0
es             0x0  0
fs             0x0  0
gs             0x0  0

Solution

  • I do decoding like below:

    while (!exit) {
        avcodec_send_packet(AVCodecConext * ctx, AVPacket* pkt);
        // check return value ...
    
        avcodec_receive_frame(AVCodecConext * ctx, AVFrame* frame);
        //check return value ...
    }
    

    Before decoding, the public member of AVCodecContext, get_buffer2, which is a callback function and will be called when calling avcodec_receive_frame(). I assign below function to get_buffer2:

    int get_frame_buffer(struct AVCodecContext *c, AVFrame *frame, int flags) {
        
        frame->data[0] = posix_memalign(32, frmaeSize); // previously, I used operator new,which will trigger coredump when decoding HEVC
        
        // ...       
    }