calgorithmsortingmergesort

Implementation issue with merge sort


I used the same algorithm in Introduction to Algorithms, Third Edition but it is not working very well, its sorts only the first 4 numbers in the array.

code:

#include <stdio.h>

void sortArr(int *nums, int arrSize) {
    // nums[start...end]
    // nums[start...mid]  n1
    // nums[mid+1...end]  n2
    int start, mid, end;
    start = 0;
    end = arrSize-1;
    mid = (end + start) / 2;
    int n1, n2;
    n1 = mid - start + 1;
    n2 = end - mid;
    int l[n1], r[n2];
    for (int i = 0; i < n1; i++) {
        l[i] = nums[start + i];
    }

    for (int i = 0; i < n2; i++) {
        r[i] = nums[mid + 1 + i];
    }

    int i, j;
    i = 0;
    j = 0;
    for (int k = start; k < arrSize; k++) {
        if (l[i] <= r[j]) {
            nums[k] = l[i];
            i++;
        } else {
            nums[k] = r[j];
            j++;
        }
    }
}

Solution

  • There's quite a few problems with your code, including the fact that you've only implemented MERGE from the book (a subroutine for MERGESORT), and the book uses a trick of appending +infinity to the "halves" of the array to avoid having code that handles when one of the halves is used up when merging. You've not included that trick (eg: by appending INT_MAX to L and R and requiring that the original array didn't include that value), but you haven't replaced it with anything so your code can easily read out of bounds when merging.

    Here's a working version based on the book algorithms. Compared to the code in the question, it also avoids the VLAs (variable length arrays) which are likely to fail on large input arrays using a one-time malloc-ed buffer, and uses the more correct size_t for array indexes.

    The code adds some extra tests in the if statement for li and ri when merging which is a different way than the book's +infinity trick, and works better in C.

    The top-level function MERGESORT returns 1 if successful, and 0 if unsuccessful (which can happen if malloc fails). Assertions are used to check assumptions about the various indexes and sizes -- these should fail only if there's a bug in the code and can be compiled out.

    It runs on a random array of a configurable size (currently 1234), and prints out ok or failed at the end depending whether the array is actually sorted. (Note: rand is normally to be avoided because it's usually a poor supply of random numbers, but it's fine here).

    Note this code is carefully written, but it's still not very well tested so you may find bugs!

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <assert.h>
    
    void MERGE(int *nums, int *buffer, size_t start, size_t mid, size_t end) {
        size_t n1 = mid - start + 1;
        size_t n2 = end - mid;
        assert(n1 > 0);
        assert(n2 > 0);
        memcpy(buffer, nums + start, (end - start + 1) * sizeof(int));
        int *L = buffer;
        int *R = buffer + n1;
        size_t li = 0, ri = 0;
        for (size_t i = start; i <= end; i++ ){
            if (li < n1 && (ri == n2 || L[li] <= R[ri])) {
                assert(li < n1);
                nums[i] = L[li++];
            } else {
                assert(ri < n2);
                nums[i] = R[ri++];
            }
        }
    }
    
    void MERGESORT0(int *nums, int *buffer, size_t start, size_t end) {
        if (end == start) return;
        size_t mid = start + (end - start) / 2;
        MERGESORT0(nums, buffer, start, mid);
        MERGESORT0(nums, buffer, mid+1, end);
        MERGE(nums, buffer, start, mid, end);
    }
    
    int MERGESORT(int *nums, size_t size) {
        if (size == 0) {
            return 1;
        }
        int *buffer = malloc(sizeof(int) * size);
        if (!buffer) return 0;
        MERGESORT0(nums, buffer, 0, size-1);
        free(buffer);
        return 1;
    }
    
    #define N 1234
    int main(){
        int arr[N];
        for (size_t i = 0; i < N; i++) {
            arr[i] = rand();
        }
        size_t arrsize = sizeof(arr)/sizeof(arr[0]);
        printf("before sorting: \n");
        for(size_t i=0; i<arrsize; i++){
            printf("%d ", arr[i]);
        }
        printf("\n");
        if (!MERGESORT(arr, arrsize)) {
            printf("failed\n");
            exit(1);
        }
        printf("after sorting: \n");
        int failed = 0;
        for(size_t i=0; i<arrsize; i++){
            if (i > 0 && arr[i] < arr[i-1]) failed = 1;
            printf("%d ", arr[i]);
        }
        printf("\n");
        printf("%s\n", failed ? "failed" : "ok");
        return 0;
    }