void insertion_sort_1(int *begin, int *end) {
for (int *cur = begin + 1; cur < end; ++cur) {
int tmp = *cur;
int *pos = cur;
for (int *i = cur; i > begin && *(i - 1) > tmp; --i) {
*i = *(i - 1);
pos = i - 1;
}
*pos = tmp;
}
}
void insertion_sort_2(int *begin, int *end) {
for (int *cur = begin + 1; cur < end; ++cur) {
int tmp = *cur;
int *i = cur;
for (; i > begin && *(i - 1) > tmp; --i) {
*i = *(i - 1);
}
*(i-1) = tmp;
}
}
I initially thought the second insertion sort algorithm is faster,but after the experiment it was found that the second insertion sort algorithm is slower! Result:
algorithm | run time |
---|---|
insertion_sort_1 | 2245 ms |
insertion_sort_2 | 2899 ms |
Test code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define SMALL_N 5000
#define MIDDLE_N 100000
#define BIG_N 10000000
__attribute__((constructor))
void __init__Rand__() {
srand(time(0));
}
bool check(int* begin, int* end) {
int* cur = begin;
for(; cur < end - 1; ++cur) {
if(*cur > *(cur + 1)) return false;
}
return true;
}
#define TEST(func, begin, end){\
printf("Test %s : ", #func);\
int *tmp = (int*)malloc(sizeof(int) * (end - begin));\
memcpy(tmp, begin, sizeof(int) * (end - begin));\
long long b = clock();\
func(tmp, tmp - end + begin);\
long long e = clock();\
if(check(tmp, tmp - end + begin)) printf("\tOK");\
else printf("\tWrong");\
printf("\t%lld ms\n", (e - b) * 1000 / CLOCKS_PER_SEC);\
free(tmp);\
}
int *geneArr(unsigned n) {
int* arr = (int*)malloc(sizeof(int) * n);
for(int i = 0; i < n; ++i) {
int tmp = rand() % 10000;
arr[i] = tmp;
}
return arr;
}
void swap(int* a, int* b) {
if(a == b) return;
int c = *a;
*a = *b;
*b = c;
}
// ================================================================================================
void selection_sort(int* begin,int* end) {
for(int* cur = begin; cur < end - 1; ++cur) {
int* minimum = cur;
for(int* cur_find = cur + 1; cur_find != end; ++cur_find) {
if(*cur_find < *minimum) minimum = cur_find;
}
if(minimum != cur) swap(minimum, cur);
}
}
void insertion_sort_1(int *begin, int *end) {
for (int *cur = begin + 1; cur < end; ++cur) {
int tmp = *cur;
int *pos = cur;
for (int *i = cur; i > begin && *(i - 1) > tmp; --i) {
*i = *(i - 1);
pos = i - 1;
}
*pos = tmp;
}
}
void insertion_sort_2(int *begin, int *end) {
for (int *cur = begin + 1; cur < end; ++cur) {
int tmp = *cur;
int *i = cur;
for (; i > begin && *(i - 1) > tmp; --i) {
*i = *(i - 1);
}
*(i-1) = tmp;
}
}
int main() {
// int N=SMALL_N;
int N=MIDDLE_N;
// int N=BIG_N;
int* arr = geneArr(N);
TEST(insertion_sort_1, arr, arr + N);
TEST(insertion_sort_2, arr, arr + N);
free(arr);
return 0;
}
In the second algorithm,I have reduce the assignment operator,but it becomes slower.I want to know why?Thanks!
Two defects render the benchmark meaningless:
In the TEST
macro you use tmp - end + begin
to calculate the end
pointer of the tmp
array but it should be tmp + (end - begin)
.
insertion_sort_2()
returns a sorted array but it doesn't contain all the elements of the input array so it's something different than a sorting algorithm. For example here is a run with N=10:
before: 5298, 8764, 9261, 4325, 1305, 2502, 7606, 3141, 8770, 331
after: 8764, 8764, 8764, 8764, 8764, 8764, 8770, 9261, 9261, 9261
The inner loop condition is i > begin && *(i - 1) > tmp
which means when the loop terminates in this case i == begin || *(i - 1) <= tmp
. The statement after the loop *(i-1) = tmp
can and will write before the array which is undefined behavior. It should be *i = tmp;
.
Once you fix those two issues the benchmark seem to run in about the same time when compiled with gcc -O2
:
$ for((i=0; i < 3; i++)); do ./a.out ; done
Test insertion_sort_1 : OK 831 ms
Test insertion_sort_2 : OK 815 ms
Test insertion_sort_1 : OK 947 ms
Test insertion_sort_2 : OK 955 ms
Test insertion_sort_1 : OK 831 ms
Test insertion_sort_2 : OK 867 ms
Other than labels the assembler of the two functions are the same.
I replaced your TEST
macro with the function benchmark()
for ease of debugging, adding missing header and a few error checks:
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define N 100000
bool check(const int* begin, const int* end);
void benchmark(const char *name, void (*f)(int *, int *), int* begin, int* end) {
printf("Test %s : ", name);
size_t n = end - begin;
int *tmp = malloc(sizeof(int) * n);
if(!tmp) {
perror("");
exit(1);
}
memcpy(tmp, begin, sizeof(int) * n);
long long b = clock();
(*f)(tmp, tmp + n);
long long e = clock();
if(check(tmp, tmp + n))
printf("\tOK");
else\
printf("\tWrong");
printf("\t%lld ms\n", (e - b) * 1000 / CLOCKS_PER_SEC);\
free(tmp);
}
bool check(const int* begin, const int* end) {
for(const int* cur = begin; cur < end - 1; ++cur) {
if(*cur > *(cur + 1))
return false;
}
return true;
}
int *geneArr(unsigned n) {
int* arr = malloc(sizeof(int) * n);
if(!arr) {
perror("");
exit(1);
}
for(int i = 0; i < n; ++i)
arr[i] = rand() % 10000;
return arr;
}
void swap(int* a, int* b) {
if(a == b) return;
int c = *a;
*a = *b;
*b = c;
}
void insertion_sort_1(int *begin, int *end) {
for (int *cur = begin + 1; cur < end; cur++) {
int tmp = *cur;
int *pos = cur;
for (int *i = cur; i > begin && *(i - 1) > tmp; --i) {
*i = *(i - 1);
pos = i - 1;
}
*pos = tmp;
}
}
void insertion_sort_2(int *begin, int *end) {
for (int *cur = begin + 1; cur < end; cur++) {
int tmp = *cur;
int *i = cur;
for (; i > begin && *(i - 1) > tmp; --i)
*i = *(i - 1);
*i = tmp;
}
}
int main() {
srand(time(0));
int* arr = geneArr(N);
benchmark("insertion_sort_1", insertion_sort_1, arr, arr + N);
benchmark("insertion_sort_2", insertion_sort_2, arr, arr + N);
free(arr);
}