cgccinteger-promotion

What is the purpose of the instructions that gcc emits?


I have an example of C code with an issue (integer promotions from uint16_t to int, and then cast to uint32_t):

#include <stdio.h>
#include <stdint.h>

int main() {
    uint16_t x = 48000;
    uint32_t y = (x * x) / 2;

    printf("y = %u\n", y);

    return 0;
}

Program output is obviously incorrect, but my question is not about the issue with promotions per se. I was looking at what compiler outputs with and without optimizations. With -O3 both gcc and clang just pre-compute result value:

; gcc -O3:
0000000000001060 <main>:
    1060:       f3 0f 1e fa             endbr64 
    1064:       48 83 ec 08             sub    rsp,0x8
    1068:       ba 00 20 aa c4          mov    edx,0xc4aa2000         ; <- result
    106d:       bf 01 00 00 00          mov    edi,0x1
    1072:       31 c0                   xor    eax,eax
    1074:       48 8d 35 89 0f 00 00    lea    rsi,[rip+0xf89]        # 2004 <_IO_stdin_used+0x4>
    107b:       e8 d0 ff ff ff          call   1050 <__printf_chk@plt>
    1080:       31 c0                   xor    eax,eax
    1082:       48 83 c4 08             add    rsp,0x8
    1086:       c3                      ret    
    1087:       66 0f 1f 84 00 00 00    nop    WORD PTR [rax+rax*1+0x0]
    108e:       00 00 
; clang -O3:
0000000000001140 <main>:
    1140:       50                      push   rax
    1141:       48 8d 3d bc 0e 00 00    lea    rdi,[rip+0xebc]        # 2004 <_IO_stdin_used+0x4>
    1148:       be 00 20 aa c4          mov    esi,0xc4aa2000         ; <- result
    114d:       31 c0                   xor    eax,eax
    114f:       e8 dc fe ff ff          call   1030 <printf@plt>
    1154:       31 c0                   xor    eax,eax
    1156:       59                      pop    rcx
    1157:       c3                      ret    

But with -O0 clang gives this (nothing suspicious):

; clang -O0:
0000000000001140 <main>:
    1140:       55                      push   rbp
    1141:       48 89 e5                mov    rbp,rsp
    1144:       48 83 ec 10             sub    rsp,0x10
    1148:       c7 45 fc 00 00 00 00    mov    DWORD PTR [rbp-0x4],0x0
    114f:       66 c7 45 fa 80 bb       mov    WORD PTR [rbp-0x6],0xbb80
    1155:       0f b7 45 fa             movzx  eax,WORD PTR [rbp-0x6]
    1159:       0f b7 4d fa             movzx  ecx,WORD PTR [rbp-0x6]
    115d:       0f af c1                imul   eax,ecx                     ; x * x
    1160:       b9 02 00 00 00          mov    ecx,0x2
    1165:       99                      cdq    
    1166:       f7 f9                   idiv   ecx                         ; (x * x) / 2 
    1168:       89 45 f4                mov    DWORD PTR [rbp-0xc],eax
    116b:       8b 75 f4                mov    esi,DWORD PTR [rbp-0xc]
    116e:       48 8d 3d 8f 0e 00 00    lea    rdi,[rip+0xe8f]        # 2004 <_IO_stdin_used+0x4>
    1175:       b0 00                   mov    al,0x0
    1177:       e8 b4 fe ff ff          call   1030 <printf@plt>
    117c:       31 c0                   xor    eax,eax
    117e:       48 83 c4 10             add    rsp,0x10
    1182:       5d                      pop    rbp
    1183:       c3                      ret    

and gcc gives this:

; gcc -O0
0000000000001149 <main>:
    1149:       f3 0f 1e fa             endbr64 
    114d:       55                      push   rbp
    114e:       48 89 e5                mov    rbp,rsp
    1151:       48 83 ec 10             sub    rsp,0x10
    1155:       66 c7 45 fa 80 bb       mov    WORD PTR [rbp-0x6],0xbb80
    115b:       0f b7 55 fa             movzx  edx,WORD PTR [rbp-0x6]
    115f:       0f b7 45 fa             movzx  eax,WORD PTR [rbp-0x6]
    1163:       0f af c2                imul   eax,edx                       ; x * x
    1166:       89 c2                   mov    edx,eax
    1168:       c1 ea 1f                shr    edx,0x1f                      ; ??? why?
    116b:       01 d0                   add    eax,edx                       ; ??? why?
    116d:       d1 f8                   sar    eax,1
    116f:       89 45 fc                mov    DWORD PTR [rbp-0x4],eax
    1172:       8b 45 fc                mov    eax,DWORD PTR [rbp-0x4]
    1175:       89 c6                   mov    esi,eax
    1177:       48 8d 05 86 0e 00 00    lea    rax,[rip+0xe86]        # 2004 <_IO_stdin_used+0x4>
    117e:       48 89 c7                mov    rdi,rax
    1181:       b8 00 00 00 00          mov    eax,0x0
    1186:       e8 c5 fe ff ff          call   1050 <printf@plt>
    118b:       b8 00 00 00 00          mov    eax,0x0
    1190:       c9                      leave  
    1191:       c3                      ret

My question: What is the purpose of the instuctions at addresses 1168 and 116b in gcc output with -O0?:

    1168:       c1 ea 1f                shr    edx,0x1f                      ; ??? why?
    116b:       01 d0                   add    eax,edx                       ; ??? why?

I don't think these two instructions are just noise, but their purpose eludes me (adding sign bit? but why?).

gcc version: 11.4.0, clang version: 14.0.0, target: x86_64-pc-linux-gnu


Solution

  • Both uint16_t values will get promoted to (signed) int when the multiplication is performed. Therefore, your program is invoking undefined behavior due to signed integer overflow.

    However, when compiling with -O0, the compiler is not aware of the overflow, because it is generating code for the line

    uint32_t y = (x * x) / 2;
    

    without being aware of the value of x. So it generates code that should work with any value of x.

    When generating code for that line, the compiler is aware that the expression (x * x) will yield an int. However, it does not know whether this value will be positive or negative. So it must emit instructions which can handle both positive and negative values.

    For a positive value, the most efficient way to divide by 2 is to shift the value right by one bit.

    However, this won't work for negative values, even if an arithmetic shift (which preserves the sign bit) is performed. Assuming that the negative value is in two's complement representation, you must add 1 to the value before performing the arithmetic shift, in order to get the desired value.

    The instruction

    shr    edx,0x1f
    

    extracts the sign bit and stores it in the EDX register. So if the value is negative, the EDX register will have the value 1. Otherwise, it will have the value 0.

    Then, the instruction

    add    eax,edx
    

    adds this sign bit to the result of the multiplication (which is in the EAX register). So for a positive number, it will add 0, and for a negative number, it will add 1. This is more efficient than testing for the sign bit and performing a conditional jump depending on the value of the sign bit.

    After adding the sign bit to the result of the multiplication, the main part of the division can be performed, by performing an arithmetic right shift by one bit.