cperformance

Why these two lines have different performance in a C program?


So I'm writing some program consisting of a simple heap with C. The problem pop up when I'm doing the 'pop' operation part, the whole 'pop' function looks like this——

typedef long long ll;
ll pop(){
    ll res=hp[1];hp[1]=hp[used];
    for(ll x=1,y=2;y<used;){
        // ll w=(((y|1)<used)&&(hp[y|1]>=hp[y]))?(y|1):y;
        ll w=y|(((y|1)<used)&&(hp[y|1]>=hp[y]));
        if(hp[w]<=hp[x])break;
        ll temp=hp[x];hp[x]=hp[w];hp[w]=temp;
        x=w,y=w<<1;
    }
    used--;
    return res;
}

Where 'hp' refers to the heap data, and 'used' is a global var showing the element count of the heap. Now the problem is, I tested the performance of line5 and line6, which I think do the same thing: to create a var 'w' with a certain value, but I found that line5 does much faster than line6(e.g. when repeating 'pop' 20million times, there's a difference of 0.1 seconds or more), and I can't figure out exactly why?

I thought it might take some time to transfer from bool to long long(if there ever exists such transfer), so I tried this below:

#include<stdio.h>
#include<time.h>
typedef __int128_t ll;
#define bool _Bool
int main(){
    int a=clock();
    // ll x=1;
    bool x=1;
    ll y=15415146514465;
    for(ll i=0;i<200000000;i++){
        ll z=x|y;
        y+=z;
    }
    int b=clock();
    printf("%lf",(b-a)/1000.0);
    return 0;
}

and found a slight difference(less than 0.05 seconds when repeating 200million times) if I switch between two types of var 'x'. However, is it that all the performance difference up there is caused by the same problem below(it seems that the type transfer doesn't cost that much)? Is there perhaps another issue I did not notice? Is my assumption even right?


Solution

  • Simply, the compiler is not compiling these lines the same way when you optimize. So the performance is slightly different.

    typedef long long ll;
    
    ll pop(ll *hp, size_t used, ll y)
    {
        ll w=(((y|1)<used)&&(hp[y|1]>=hp[y]))?(y|1):y;
        return w;
    }
    
    ll pop1(ll *hp, size_t used, ll y)
    {
        ll w=y|(((y|1)<used)&&(hp[y|1]>=hp[y]));
        return w;
    }
    
    pop:
            mov     rax, rdx
            or      rdx, 1
            cmp     rdx, rsi
            jnb     .L2
            mov     rcx, QWORD PTR [rdi+rax*8]
            cmp     QWORD PTR [rdi+rdx*8], rcx
            cmovge  rax, rdx
    .L2:
            ret
    pop1:
            mov     rax, rdx
            or      rdx, 1
            cmp     rdx, rsi
            jnb     .L6
            mov     rcx, QWORD PTR [rdi+rax*8]
            cmp     QWORD PTR [rdi+rdx*8], rcx
            setge   dl
            movzx   edx, dl
            or      rax, rdx
    .L6:
            ret
    
    

    https://godbolt.org/z/q7dK1Kqbs

    https://godbolt.org/z/za866szvq

    As a side note: your code is a perfect example of how not to name variables and structure code. It is completely unreadable.