When I enter more than 250 elements When I run the program without inserting Natural Merge Sort code, my program can run with 1000000 elements, but after I inserted Natural Merge Sort, my program can only run exactly with 250 elements. That means when I entered elements >250, my program just showed nothing and did not run anymore. The program should be like this Here is my source code;
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <time.h>
int min(int x, int y) {
return (x < y) ? x : y;
}
// Natural Merge Sort
int SoDuongChay(int A[], int n) {
int dem = 0, i = 0;
while (i < n) {
while (A[i] < A[i + 1] && i < n - 1)
i++;
dem++;
i++;
}
return dem;
}
void MergeNatural(int A[], int B[], int C[], int *nB, int *nC, int *nA) {
int p = 0, pb = 0, pc = 0, ib = 0, ic = 0, kb, kc;
while ((*nB > 0) && (*nC > 0)) {
kb = min(*nB, 1000000);
kc = min(*nC, 1000000);
if (B[pb + ib] <= C[pc + ic]) {
A[(*nA)++] = B[pb + ib];
ib++;
if (ib == kb) {
for (; ic < kc; ic++)
A[(*nA)++] = C[pc + ic];
pb += kb;
pc += kc;
ib = ic = 0;
*nB -= kb;
*nC -= kc;
}
} else {
A[(*nA)++] = C[pc + ic];
ic++;
if (ic == kc) {
for (; ib < kb; ib++)
A[(*nA)++] = B[pb + ib];
pb += kb;
pc += kc;
ib = ic = 0;
*nB -= kb;
*nC -= kc;
}
}
}
while (*nB > 0) {
A[(*nA)++] = B[pb++];
(*nB)--;
}
while (*nC > 0) {
A[(*nA)++] = C[pc++];
(*nC)--;
}
*nB = 0;
*nC = 0;
}
void DistributeCombine(int A[], int B[], int C[], int n) {
int i = 0, dem = 0, nA = 0;
int nB = 0, nC = 0;
while (i < n) {
int temp = i;
while (A[i] < A[i + 1] && i < n - 1)
i++;
if (dem % 2 == 0)
for (int j = temp; j <= i; j++)
B[nB++] = A[j];
else {
for (int j = temp; j <= i; j++)
C[nC++] = A[j];
MergeNatural(A, B, C, &nB, &nC, &nA);
}
i++;
dem++;
}
if (nB != 0)
for (int k = 0; k < nB; k++)
A[nA++] = B[k];
}
void NaturalMergeSort(int A[], int B[], int C[], int n) {
while (SoDuongChay(A, n) >= 2)
DistributeCombine(A, B, C, n);
}
//Merge Sort
void Distribute(int a[], int n, int *pb, int *pc, int k, int b[], int c[]) {
*pb = 0;
*pc = 0;
int i = 0;
int j;
while (i < n) {
for (j = 0; (j < k) && (i < n); j++) {
b[(*pb)++] = a[i++];
}
for (j = 0; (j < k) && (i < n); j++) {
c[(*pc)++] = a[i++];
}
}
}
void Merge(int a[], int nb, int nc, int k, int b[], int c[]) {
int p, pb, pc, ib, ic, kb, kc;
p = pb = pc = 0;
ib = ic = 0;
while ((0 < nb) && (0 < nc)) {
kb = min(k, nb);
kc = min(k, nc);
if (b[pb + ib] <= c[pc + ic]) {
a[p++] = b[pb + ib];
ib++;
if (ib == kb) {
for (; ic < kc; ic++)
a[p++] = c[pc + ic];
pb += kb;
pc += kc;
ib = ic = 0;
nb -= kb;
nc -= kc;
}
} else {
a[p++] = c[pc + ic];
ic++;
if (ic == kc) {
for (; ib < kb; ib++)
a[p++] = b[pb + ib];
pb += kb;
pc += kc;
ib = ic = 0;
nb -= kb;
nc -= kc;
}
}
}
while (nb > 0) {
a[p++] = b[pb++];
nb--;
}
while (nc > 0) {
a[p++] = c[pc++];
nc--;
}
}
void MergeSort(int a[], int n) {
int length = 1000000;
int *b = (int *)malloc(length * sizeof(int));
int *c = (int *)malloc(length * sizeof(int));
int pb, pc;
int k = 1;
do {
Distribute(a, n, &pb, &pc, k, b, c);
Merge(a, pb, pc, k, b, c);
k *= 2;
} while (k < n);
}
//quickSort
int sumArray(int a[], int n) {
if (n <= 0) {
return 0;
} else {
return a[n-1] + sumArray(a, n - 1);
}
}
void swap(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
int partition(int a[], int left, int right) {
int pivot = a[(left + right) / 2];
int i = left - 1;
int j = right + 1;
while (1) {
do {
i++;
} while(a[i] < pivot);
do {
j--;
} while(a[j] > pivot);
if (i < j)
swap(&a[i], &a[j]);
else
return j;
}
}
void QuickSort(int a[], int left, int right) {
if (left >= right)
return;
int p = partition(a, left, right);
QuickSort(a, left, p);
QuickSort(a, p + 1, right);
}
void printArray(int a[], int n) {
int i;
for (i = 0; i < n; i++)
printf("%d ", a[i]);
printf("\n");
}
int main() {
int i, n;
int length = 1000000;
int *a = (int *)malloc(length * sizeof(int));
int *a1 = (int *)malloc(length * sizeof(int));
int *a2 = (int *)malloc(length * sizeof(int));
int *b = (int *)malloc(length * sizeof(int));
int *c = (int *)malloc(length * sizeof(int));
srand(time(NULL));
printf("\nEnter the number of elements: ");
scanf("%d", &n);
for (i = 0; i < n; i++)
a[i] = rand() % 1000000;
for (i = 0; i < n; i++)
a1[i] = a[i];
for (i = 0; i < n; i++)
a2[i] = a[i];
struct timespec start, end;
float NaturalMergeSort_time, MergeSort_time, QuickSort_time;
clock_gettime(CLOCK_MONOTONIC, &start);
NaturalMergeSort(a, b, c, n);
clock_gettime(CLOCK_MONOTONIC, &end);
NaturalMergeSort_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000.0;
//
clock_gettime(CLOCK_MONOTONIC, &start);
MergeSort(a1, n);
clock_gettime(CLOCK_MONOTONIC, &end);
MergeSort_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000.0;
//
clock_gettime(CLOCK_MONOTONIC, &start);
QuickSort(a2, 0, n - 1);
clock_gettime(CLOCK_MONOTONIC, &end);
QuickSort_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000.0;
printf("\nCompare table:\n");
printf("------------------------------------------------------\n");
printf("| Algorithm | Time (microseconds) |\n");
printf("------------------------------------------------------\n");
printf("| Natural Merge Sort | %.2f |\n", NaturalMergeSort_time);
printf("| Merge Sort | %.2f |\n", MergeSort_time);
printf("| Quick Sort | %.2f |\n", QuickSort_time);
printf("------------------------------------------------------\n");
free(a);
free(a1);
free(a2);
free(b);
free(c);
return 0;
}
I want to know what's wrong with my Natural Merge Sort code or my declaration.
The reason your NaturalMergeSort
function fails for sufficiently large sets is it cannot handle duplicate values. The loop while (A[i] < A[i + 1] && i < n - 1)
has 2 issues:
i < n - 1
should be performed before accessing A[i + 1]
andA[i] < A[i + 1]
should use <=
instead of <
As coded, any duplicate values will create separate runs so the function SoDuongChay
never returns 1
, so the natural merge sort never finishes and the program is stuck with no output. Note that the function DistributeCombine
has the same flaw and calls MergeNatural
with sets that are already merged but this extra work has no effect.Note also these remarks:
kb
as kb = min(*nB, 1000000)
seems useless and will cause problems for very large sets. You should just set kb = *nB;
MergeNatural
is too complex. Updating the index variables from the caller is needlessly complicated and difficult to follow. A simpler approach is recommended.SoDuongChay
causes extra scans of the data set. DistributeCombine
could return the number of merged sets and NaturalMergeSort
would continue calling it as long as the return value is greater than 1.NaturalMergeSort
should allocate and free its ancillary arrays and QuickSort
should be a wrapper that calls its own recursive ancillary function.MergeSort
does not work for sets larger than 1 million items because you allocate temporary arrays with a fixed length of 1 million. Use n
instead.b
and c
are not freed in MergeSort()
.Here is a modified version with a simpler and smaller natural merge sort functions:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <time.h>
int min(int x, int y) {
return (x < y) ? x : y;
}
// Natural Merge Sort
int SoDuongChay(int A[], int n) {
int dem = 0, i = 0;
while (i < n) {
while (i < n - 1 && A[i] <= A[i + 1])
i++;
dem++;
i++;
}
return dem;
}
void MergeNatural(int A[], int B[], int C[], int *nB, int *nC, int *nA) {
int pb = 0, pc = 0, ib = 0, ic = 0, kb, kc;
while ((*nB > 0) && (*nC > 0)) {
kb = *nB;//min(*nB, 1000000);
kc = *nC;//min(*nC, 1000000);
if (B[pb + ib] <= C[pc + ic]) {
A[(*nA)++] = B[pb + ib];
ib++;
if (ib == kb) {
for (; ic < kc; ic++)
A[(*nA)++] = C[pc + ic];
pb += kb;
pc += kc;
ib = ic = 0;
*nB -= kb;
*nC -= kc;
}
} else {
A[(*nA)++] = C[pc + ic];
ic++;
if (ic == kc) {
for (; ib < kb; ib++)
A[(*nA)++] = B[pb + ib];
pb += kb;
pc += kc;
ib = ic = 0;
*nB -= kb;
*nC -= kc;
}
}
}
while (*nB > 0) {
A[(*nA)++] = B[pb++];
(*nB)--;
}
while (*nC > 0) {
A[(*nA)++] = C[pc++];
(*nC)--;
}
*nB = 0;
*nC = 0;
}
void DistributeCombine(int A[], int B[], int C[], int n) {
int i = 0, dem = 0, nA = 0, nB = 0, nC = 0;
while (i < n) {
int temp = i;
while (i < n - 1 && A[i] <= A[i + 1])
i++;
if (dem % 2 == 0) {
for (int j = temp; j <= i; j++)
B[nB++] = A[j];
} else {
for (int j = temp; j <= i; j++)
C[nC++] = A[j];
MergeNatural(A, B, C, &nB, &nC, &nA);
}
i++;
dem++;
}
if (nB != 0) {
for (int j = 0; j < nB; j++)
A[nA++] = B[j];
}
}
void NaturalMergeSort(int A[], int n) {
int *B = (int *)malloc(n * sizeof(int));
int *C = (int *)malloc(n * sizeof(int));
while (SoDuongChay(A, n) >= 2) {
DistributeCombine(A, B, C, n);
}
free(B);
free(C);
}
// Simpler Natural Merge Sort
void MyMergeNatural(int A[], int B[], int C[], int nB, int nC, int nA) {
int pb = 0, pc = 0;
while (pb < nB && pc < nC) {
if (B[pb] <= C[pc])
A[nA++] = B[pb++];
else
A[nA++] = C[pc++];
}
while (pb < nB) {
A[nA++] = B[pb++];
}
while (pc < nC) {
A[nA++] = C[pc++];
}
}
int MyDistributeCombine(int A[], int B[], int C[], int n) {
int i = 0, runs = 0;
while (i < n) {
int nA = i;
int nB = 0;
while (i < n - 1 && A[i] <= A[i + 1]) {
B[nB++] = A[i++];
}
B[nB++] = A[i++];
runs++;
if (i >= n)
break;
int nC = 0;
while (i < n - 1 && A[i] <= A[i + 1]) {
C[nC++] = A[i++];
}
C[nC++] = A[i++];
MyMergeNatural(A, B, C, nB, nC, nA);
}
return runs;
}
void MyNaturalMergeSort(int A[], int n) {
int *B = (int *)malloc(n * sizeof(int));
int *C = (int *)malloc(n * sizeof(int));
while (MyDistributeCombine(A, B, C, n) > 1)
continue;
free(B);
free(C);
}
// Alternative using a single extra array
void MyMergeNatural2(int A[], int B[], int nB, int pc, int nC, int nA) {
int pb = 0;
while (pb < nB && pc < nC) {
if (B[pb] <= A[pc])
A[nA++] = B[pb++];
else
A[nA++] = A[pc++];
}
while (pb < nB) {
A[nA++] = B[pb++];
}
}
int MyDistributeCombine2(int A[], int B[], int n) {
int i = 0, runs = 0;
while (i < n) {
int nA = i;
int nB = 0;
while (i < n - 1 && A[i] <= A[i + 1]) {
B[nB++] = A[i++];
}
B[nB++] = A[i++];
runs++;
if (i >= n)
break;
int temp = i;
while (i < n - 1 && A[i] <= A[i + 1])
i++;
i++;
MyMergeNatural2(A, B, nB, temp, i, nA);
}
return runs;
}
void MyNaturalMergeSort2(int A[], int n) {
int *B = (int *)malloc(n * sizeof(int));
while (MyDistributeCombine2(A, B, n) > 1)
continue;
free(B);
}
//Merge Sort
void Distribute(int a[], int n, int *pb, int *pc, int k, int b[], int c[]) {
*pb = 0;
*pc = 0;
int i = 0;
int j;
while (i < n) {
for (j = 0; (j < k) && (i < n); j++) {
b[(*pb)++] = a[i++];
}
for (j = 0; (j < k) && (i < n); j++) {
c[(*pc)++] = a[i++];
}
}
}
void Merge(int a[], int nb, int nc, int k, int b[], int c[]) {
int p, pb, pc, ib, ic, kb, kc;
p = pb = pc = 0;
ib = ic = 0;
while ((0 < nb) && (0 < nc)) {
kb = min(k, nb);
kc = min(k, nc);
if (b[pb + ib] <= c[pc + ic]) {
a[p++] = b[pb + ib];
ib++;
if (ib == kb) {
for (; ic < kc; ic++)
a[p++] = c[pc + ic];
pb += kb;
pc += kc;
ib = ic = 0;
nb -= kb;
nc -= kc;
}
} else {
a[p++] = c[pc + ic];
ic++;
if (ic == kc) {
for (; ib < kb; ib++)
a[p++] = b[pb + ib];
pb += kb;
pc += kc;
ib = ic = 0;
nb -= kb;
nc -= kc;
}
}
}
while (nb > 0) {
a[p++] = b[pb++];
nb--;
}
while (nc > 0) {
a[p++] = c[pc++];
nc--;
}
}
void MergeSort(int a[], int n) {
if (n > 1) {
int *b = (int *)malloc(n * sizeof(int));
int *c = (int *)malloc(n * sizeof(int));
int pb, pc;
int k = 1;
do {
Distribute(a, n, &pb, &pc, k, b, c);
Merge(a, pb, pc, k, b, c);
k *= 2;
} while (k < n);
free(b);
free(c);
}
}
//quickSort
void swap(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
int partition(int a[], int left, int right) {
int pivot = a[(left + right) / 2];
int i = left - 1;
int j = right + 1;
while (1) {
do {
i++;
} while(a[i] < pivot);
do {
j--;
} while(a[j] > pivot);
if (i < j)
swap(&a[i], &a[j]);
else
return j;
}
}
void QuickSort1(int a[], int left, int right) {
if (left < right) {
int p = partition(a, left, right);
QuickSort1(a, left, p);
QuickSort1(a, p + 1, right);
}
}
void QuickSort(int a[], int n) {
if (n > 1) {
QuickSort1(a, 0, n - 1);
}
}
int CheckSort(const int a[], int n, const char *name) {
for (int i = 1; i < n; i++) {
if (a[i - 1] > a[i]) {
printf("%s failed: a[%d] = %d > %d = a[%d]\n",
name, i - 1, a[i - 1], a[i], i);
return 1;
}
}
return 0;
}
void printArray(int a[], int n) {
for (int i = 0; i < n; i++)
printf("%d ", a[i]);
printf("\n");
}
int main(int argc, char *argv[]) {
int n;
srand(time(NULL));
if (argc > 1) {
n = strtoul(argv[1], NULL, 0);
} else {
printf("\nEnter the number of elements: ");
if (scanf("%d", &n) != 1)
return 1;
}
struct {
void (*sort)(int *a, int n);
const char *name;
int check;
double time;
} test[] = {
{ NaturalMergeSort, "Natural Merge Sort", 0, 0 },
{ MergeSort, "Merge Sort", 0, 0 },
{ QuickSort, "Quick Sort", 0, 0 },
{ MyNaturalMergeSort, "Simpler Natural Merge Sort", 0, 0 },
{ MyNaturalMergeSort2, "Smaller Natural Merge Sort", 0, 0 },
};
int *a0 = (int *)malloc(n * sizeof(int));
int *a1 = (int *)malloc(n * sizeof(int));
for (int i = 0; i < n; i++) {
a0[i] = rand() % 1000000;
}
int status = 0;
for (size_t t = 0; t < sizeof(test) / sizeof(test[0]); t++) {
struct timespec start, end;
for (int i = 0; i < n; i++)
a1[i] = a0[i];
clock_gettime(CLOCK_MONOTONIC, &start);
test[t].sort(a1, n);
clock_gettime(CLOCK_MONOTONIC, &end);
test[t].check = CheckSort(a1, n, test[t].name);
status |= test[t].check;
test[t].time = (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_nsec - start.tv_nsec) / 1000000.0;
}
printf("Sorting times for %d entries\n", n);
printf("--------------------------------------------\n");
printf("| Algorithm | Time |\n");
printf("--------------------------------------------\n");
for (size_t t = 0; t < sizeof(test) / sizeof(test[0]); t++) {
if (!test[t].check)
printf("| %-26s | %8.3f ms |\n", test[t].name, test[t].time);
}
printf("--------------------------------------------\n");
free(a0);
free(a1);
return status;
}
Output:
Sorting times for 10000000 entries
--------------------------------------------
| Algorithm | Time |
--------------------------------------------
| Natural Merge Sort | 1928.621 ms |
| Merge Sort | 1601.156 ms |
| Quick Sort | 1094.788 ms |
| Simpler Natural Merge Sort | 1099.796 ms |
| Smaller Natural Merge Sort | 1025.388 ms |
--------------------------------------------