c++carrayspointersoptimization

Is it faster to iterate through the elements of an array with pointers incremented by 1?


Is it faster to do something like

for ( int * pa(arr), * pb(arr+n); pa != pb; ++pa )
{ 
   // do something with *pa
}

than

for ( size_t k = 0; k < n; ++k )
{ 
   // do something with arr[k]
}

???

I understand that arr[k] is equivalent to *(arr+k), but in the first method you are using the current pointer which has incremented by 1, while in the second case you are using a pointer which is incremented from arr by successively larger numbers. Maybe hardware has special ways of incrementing by 1 and so the first method is faster? Or not? Just curious. Hope my question makes sense.


Solution

  • If the compiler is smart enought (and most of compilers is) then performance of both loops should be ~equal.

    For example I have compiled the code in gcc 5.1.0 with generating assembly:

    int __attribute__ ((noinline)) compute1(int* arr, int n)
    {
      int sum = 0;
      for(int i = 0; i < n; ++i)
      {
        sum += arr[i];
      }
      return sum;
    }
    
    int __attribute__ ((noinline)) compute2(int* arr, int n)
    {
      int sum = 0;
      for(int * pa(arr), * pb(arr+n); pa != pb; ++pa)
      {
        sum += *pa;
      }
      return sum;
    }
    

    And the result assembly is:

    compute1(int*, int):
        testl   %esi, %esi
        jle .L4
        leal    -1(%rsi), %eax
        leaq    4(%rdi,%rax,4), %rdx
        xorl    %eax, %eax
    .L3:
        addl    (%rdi), %eax
        addq    $4, %rdi
        cmpq    %rdx, %rdi
        jne .L3
        rep ret
    .L4:
        xorl    %eax, %eax
        ret
    compute2(int*, int):
        movslq  %esi, %rsi
        xorl    %eax, %eax
        leaq    (%rdi,%rsi,4), %rdx
        cmpq    %rdx, %rdi
        je  .L10
    .L9:
        addl    (%rdi), %eax
        addq    $4, %rdi
        cmpq    %rdi, %rdx
        jne .L9
        rep ret
    .L10:
        rep ret
    main:
        xorl    %eax, %eax
        ret
    

    As you can see, the most heavy part (loop) of both functions is equal:

    .L9:
        addl    (%rdi), %eax
        addq    $4, %rdi
        cmpq    %rdi, %rdx
        jne .L9
        rep ret
    

    But in more complex examples or in other compiler the results might be different. So you should test it and measure, but most of compilers generate similar code.

    The full code sample