calgorithminsertion-sort

Which of the following insertion sort algorithms do you think is faster?


void insertion_sort_1(int *begin, int *end) {
    for (int *cur = begin + 1; cur < end; ++cur) {
        int tmp = *cur;
        int *pos = cur;
        for (int *i = cur; i > begin && *(i - 1) > tmp; --i) {
            *i = *(i - 1);
            pos = i - 1;
        }
        *pos = tmp;
    }
}

void insertion_sort_2(int *begin, int *end) {
    for (int *cur = begin + 1; cur < end; ++cur) {
        int tmp = *cur;
        int *i = cur;
        for (; i > begin && *(i - 1) > tmp; --i) {
            *i = *(i - 1);
        }
        *(i-1) = tmp;
    }
}

I initially thought the second insertion sort algorithm is faster,but after the experiment it was found that the second insertion sort algorithm is slower! Result:

algorithm run time
insertion_sort_1 2245 ms
insertion_sort_2 2899 ms

Test code:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h> 

#define SMALL_N 5000
#define MIDDLE_N 100000
#define BIG_N 10000000

__attribute__((constructor))
void __init__Rand__() {
    srand(time(0));
}

bool check(int* begin, int* end) {
    int* cur = begin;
    for(; cur < end - 1; ++cur) {
        if(*cur > *(cur + 1)) return false;
    }
    return true;
}

#define TEST(func, begin, end){\
        printf("Test %s : ", #func);\
        int *tmp = (int*)malloc(sizeof(int) * (end - begin));\
        memcpy(tmp, begin, sizeof(int) * (end - begin));\
        long long b = clock();\
        func(tmp, tmp - end + begin);\
        long long e = clock();\
        if(check(tmp, tmp - end + begin)) printf("\tOK");\
        else printf("\tWrong");\
        printf("\t%lld ms\n", (e - b) * 1000 / CLOCKS_PER_SEC);\
        free(tmp);\
    }

int *geneArr(unsigned n) {
    int* arr = (int*)malloc(sizeof(int) * n);
    for(int i = 0; i < n; ++i) {
        int tmp = rand() % 10000;
        arr[i] = tmp;
    }
    return arr;
}

void swap(int* a, int* b) {
    if(a == b) return;
    int c = *a;
    *a = *b;
    *b = c;
}

// ================================================================================================

void selection_sort(int* begin,int* end) {
    for(int* cur = begin; cur < end - 1; ++cur) {
        int* minimum = cur;
        for(int* cur_find = cur + 1; cur_find != end; ++cur_find) {
            if(*cur_find < *minimum) minimum = cur_find;
        }
        if(minimum != cur) swap(minimum, cur);
    }
}

void insertion_sort_1(int *begin, int *end) {
    for (int *cur = begin + 1; cur < end; ++cur) {
        int tmp = *cur;
        int *pos = cur;
        for (int *i = cur; i > begin && *(i - 1) > tmp; --i) {
            *i = *(i - 1);
            pos = i - 1;
        }
        *pos = tmp;
    }
}

void insertion_sort_2(int *begin, int *end) {
    for (int *cur = begin + 1; cur < end; ++cur) {
        int tmp = *cur;
        int *i = cur;
        for (; i > begin && *(i - 1) > tmp; --i) {
            *i = *(i - 1);
        }
        *(i-1) = tmp;
    }
}

int main() {
//  int N=SMALL_N;
    int N=MIDDLE_N;
//  int N=BIG_N;
    int* arr = geneArr(N);

    TEST(insertion_sort_1, arr, arr + N);
    TEST(insertion_sort_2, arr, arr + N);

    free(arr); 
    return 0;
}

In the second algorithm,I have reduce the assignment operator,but it becomes slower.I want to know why?Thanks!


Solution

  • Two defects render the benchmark meaningless:

    1. In the TEST macro you use tmp - end + begin to calculate the end pointer of the tmp array but it should be tmp + (end - begin).

    2. insertion_sort_2() returns a sorted array but it doesn't contain all the elements of the input array so it's something different than a sorting algorithm. For example here is a run with N=10:

      before: 5298, 8764, 9261, 4325, 1305, 2502, 7606, 3141, 8770, 331
      after:  8764, 8764, 8764, 8764, 8764, 8764, 8770, 9261, 9261, 9261 
      

      The inner loop condition is i > begin && *(i - 1) > tmp which means when the loop terminates in this case i == begin || *(i - 1) <= tmp. The statement after the loop *(i-1) = tmp can and will write before the array which is undefined behavior. It should be *i = tmp;.

    Once you fix those two issues the benchmark seem to run in about the same time when compiled with gcc -O2:

    $ for((i=0; i < 3; i++)); do ./a.out ; done
    Test insertion_sort_1 :         OK      831 ms
    Test insertion_sort_2 :         OK      815 ms
    Test insertion_sort_1 :         OK      947 ms
    Test insertion_sort_2 :         OK      955 ms
    Test insertion_sort_1 :         OK      831 ms
    Test insertion_sort_2 :         OK      867 ms
    

    Other than labels the assembler of the two functions are the same.

    I replaced your TEST macro with the function benchmark() for ease of debugging, adding missing header and a few error checks:

    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <time.h>
    
    #define N 100000
    
    bool check(const int* begin, const int* end);
    
    void benchmark(const char *name, void (*f)(int *, int *), int* begin,  int* end) {
        printf("Test %s : ", name);
        size_t n = end - begin;
        int *tmp = malloc(sizeof(int) * n);
        if(!tmp) {
            perror("");
            exit(1);
        }
        memcpy(tmp, begin, sizeof(int) * n);
        long long b = clock();
        (*f)(tmp, tmp + n);
        long long e = clock();
        if(check(tmp, tmp + n))
            printf("\tOK");
        else\
            printf("\tWrong");
        printf("\t%lld ms\n", (e - b) * 1000 / CLOCKS_PER_SEC);\
        free(tmp);
    }
    
    bool check(const int* begin, const int* end) {
        for(const int* cur = begin; cur < end - 1; ++cur) {
            if(*cur > *(cur + 1))
                return false;
        }
        return true;
    }
    
    int *geneArr(unsigned n) {
        int* arr = malloc(sizeof(int) * n);
        if(!arr) {
            perror("");
            exit(1);
        }
        for(int i = 0; i < n; ++i)
            arr[i] = rand() % 10000;
        return arr;
    }
    
    void swap(int* a, int* b) {
        if(a == b) return;
        int c = *a;
        *a = *b;
        *b = c;
    }
    
    void insertion_sort_1(int *begin, int *end) {
        for (int *cur = begin + 1; cur < end; cur++) {
            int tmp = *cur;
            int *pos = cur;
            for (int *i = cur; i > begin && *(i - 1) > tmp; --i) {
                *i = *(i - 1);
                pos = i - 1;
            }
            *pos = tmp;
        }
    }
    
    void insertion_sort_2(int *begin, int *end) {
        for (int *cur = begin + 1; cur < end; cur++) {
            int tmp = *cur;
            int *i = cur;
            for (; i > begin && *(i - 1) > tmp; --i)
                *i = *(i - 1);
            *i = tmp;
        }
    }
    
    int main() {
        srand(time(0));
        int* arr = geneArr(N);
        benchmark("insertion_sort_1", insertion_sort_1, arr, arr + N);
        benchmark("insertion_sort_2", insertion_sort_2, arr, arr + N);
        free(arr);
    }