c++arraysbinary-searchmergesortlinear-search

Binary search taking more time than linear search


I was recently studying about binary and linear search, and decided to check for real what is the difference in the actual time taken by both of these searching algorithms. Here is my code:

#include <bits/stdc++.h>

using namespace std;

void Print(int arr[], int size) {
    for (int i = 0; i < size; i++)
        cout << arr[i] << " ";
    cout << endl;
}

void Merge_sort(int arr[], int start, int end) {
    
    if (start >= end) {
        return;
    }
    
    int mid = (start + end) / 2;
    Merge_sort(arr, start, mid);
    Merge_sort(arr, mid + 1, end);
    
    int size = end - start + 1;
    
    int arr_new[size];
    
    int i = start, j = mid + 1;
    
    for (int k = 0; k < size; k++) {
        if (i > mid) {
            arr_new[k] = arr[j++];
        } else if (j > end) {
            arr_new[k] = arr[i++];
        } else if (arr[i] < arr[j]) {
            arr_new[k] = arr[i++];
        } else {
            arr_new[k] = arr[j++];
        }
    }
    int index = start;
    for (int k = 0; k < size; k++) {
        arr[index++] = arr_new[k];
    }
}

int binary_search(int arr[], int start, int end, int x) {
    if (start > end) {
        return -1;
    }
    
    int mid = (start + end) / 2;
    if (arr[mid] == x) {
        return mid;
    } else if (arr[mid] > x) {
        return binary_search(arr, start, mid - 1, x);
    } else {
        return binary_search(arr, mid + 1, end, x);
    }
}

int linear_search(int arr[], int size, int x) {
    for (int i = 0; i < size; i++) {
        if (arr[i] == x) {
            return i;
        }
    }
    return -1;
}

int main() {
    int size;
    
    clock_t start, start1, end, end1;
    double time1, time2;
    
    cin >> size;
    
    int *arr1 = new int[size];
    int *arr2 = new int[size];
    
    for (int i = 0; i < size; i++) {
        int j = rand() % 1000;
        arr1[i] = i;
        arr2[i] = i;
    }
    
    Merge_sort(arr1, 0, size - 1);
    start = clock();
    cout << binary_search(arr1, 0, size - 1, 15) << endl;
    end = clock();
    time1 = (double)(end - start) / (double)(CLOCKS_PER_SEC);
    cout << "Time taken by binary search  is : \t" << fixed
        << time1 << setprecision(8);
    cout << " sec " << endl;
    
    start1 = clock();
    cout << linear_search(arr2, size, 15) << endl;
    end1 = clock();
    time2 = (double)(end1 - start1) / (double)(CLOCKS_PER_SEC);
    cout << "Time taken by linear search  is : \t" << fixed
        << time2 << setprecision(8);
    cout << " sec " << endl;
    return 0;
}

I have used Merge sort to sort the array for the binary search part, but have excluded it while calculating the time taken. Kindly take a look and tell me where am I going wrong. As mentioned in a lot of comments, I am adding one result I got on my online IDE.

size = 10000, linear = 0.000005 sec, binary = 0.00005 sec

Solution

  • The problem is very simple and a good C++ compiler points to it immediately with an adequate warning level:

    c++ -O3 -Weverything -o binsearch binsearch.cpp
    binsearch.cpp:81:13: warning: unused variable 'j' [-Wunused-variable]
            int j = rand() % 1000;
    

    This warning points to a major problem in this loop in the main() function:

        for (int i = 0; i < size; i++) {
            int j = rand() % 1000;
            arr1[i] = i;
            arr2[i] = i;
        }
    

    rand() is not used and both arrays are initialized to the same monotonic sequence from 0 to size-1.

    When you measure the time it takes to find the value 15, the result will likely favor linear search because 15 appears in 16th position in both arrays, very close to the beginning, hence it is found very quickly in a linear search and with approximately the same number of steps for binary search in a 10000 element sorted array.

    There are more problems:

    Here is a modified version with a benchmark for various sizes:

    #include <bits/stdc++.h>
    
    using namespace std;
    
    static void Merge_sort(int arr[], int start, int end) {
    
        if (start >= end) {
            return;
        }
    
        int mid = start + (end - start) / 2;
        Merge_sort(arr, start, mid);
        Merge_sort(arr, mid + 1, end);
    
        int size = end - start + 1;
        int *arr_new = new int[size];
        int i = start, j = mid + 1;
    
        for (int k = 0; k < size; k++) {
            if (i > mid) {
                arr_new[k] = arr[j++];
            } else if (j > end) {
                arr_new[k] = arr[i++];
            } else if (arr[i] < arr[j]) {
                arr_new[k] = arr[i++];
            } else {
                arr_new[k] = arr[j++];
            }
        }
        int index = start;
        for (int k = 0; k < size; k++) {
            arr[index++] = arr_new[k];
        }
        delete[] arr_new;
    }
    
    static int binary_search(int arr[], int start, int end, int x) {
        if (start > end) {
            return -1;
        }
        int mid = start + (end - start) / 2;
        if (arr[mid] == x) {
            return mid;
        } else if (arr[mid] > x) {
            return binary_search(arr, start, mid - 1, x);
        } else {
            return binary_search(arr, mid + 1, end, x);
        }
    }
    
    static int linear_search(int arr[], int size, int x) {
        for (int i = 0; i < size; i++) {
            if (arr[i] == x) {
                return i;
            }
        }
        return -1;
    }
    
    static int test(int size) {
        int *arr1 = new int[size];
        int *arr2 = new int[size];
    
        for (int i = 0; i < size; i++) {
            int j = rand();
            arr1[i] = j;
            arr2[i] = j;
        }
    
        Merge_sort(arr1, 0, size - 1);
    
        clock_t start1 = clock();
        int index1[4] = {};
        for (int i = 0; i < 100; i++) {
            index1[0] = binary_search(arr1, 0, size - 1, arr1[0]);
            index1[1] = binary_search(arr1, 0, size - 1, arr1[size / 2]);
            index1[2] = binary_search(arr1, 0, size - 1, arr1[size - 1]);
            index1[3] = binary_search(arr1, 0, size - 1, -1);
        }
        clock_t end1 = clock();
    
        clock_t start2 = clock();
        int index2[4] = {};
        for (int i = 0; i < 100; i++) {
            index2[0] = linear_search(arr2, size, arr1[0]);
            index2[1] = linear_search(arr2, size, arr1[size / 2]);
            index2[2] = linear_search(arr2, size, arr1[size - 1]);
            index2[3] = linear_search(arr2, size, -1);
        }
        clock_t end2 = clock();
    
        double time1 = (end1 - start1) * 1000.0 / CLOCKS_PER_SEC;
        double time2 = (end2 - start2) * 1000.0 / CLOCKS_PER_SEC;
    
        printf("size=%8d: binary searches: %8.3fms, %d %d %d %d\n",
               size, time1, index1[0], index1[1], index1[2], index1[3]);
        printf("size=%8d: linear searches: %8.3fms, %d %d %d %d\n",
               size, time2, index2[0], index2[1], index2[2], index2[3]);
    
        delete[] arr1;
        delete[] arr2;
        return 0;
    }
    
    int main() {
        for (int size = 1; size <= 10000000; size *= 10) {
            test(size);
        }
        return 0;
    }
    

    Output:

    size=       1: binary searches:    0.002ms, 0 0 0 -1
    size=       1: linear searches:    0.000ms, 0 0 0 -1
    size=       2: binary searches:    0.002ms, 0 1 1 -1
    size=       2: linear searches:    0.002ms, 0 1 1 -1
    size=       5: binary searches:    0.002ms, 0 2 4 -1
    size=       5: linear searches:    0.002ms, 3 0 4 -1
    size=      10: binary searches:    0.003ms, 0 5 9 -1
    size=      10: linear searches:    0.003ms, 9 7 1 -1
    size=      20: binary searches:    0.004ms, 0 10 19 -1
    size=      20: linear searches:    0.004ms, 15 18 5 -1
    size=      50: binary searches:    0.004ms, 0 25 49 -1
    size=      50: linear searches:    0.005ms, 44 24 0 -1
    size=     100: binary searches:    0.006ms, 0 50 99 -1
    size=     100: linear searches:    0.012ms, 34 91 0 -1
    size=     200: binary searches:    0.006ms, 0 100 199 -1
    size=     200: linear searches:    0.022ms, 62 123 58 -1
    size=     500: binary searches:    0.006ms, 0 250 499 -1
    size=     500: linear searches:    0.050ms, 47 389 252 -1
    size=    1000: binary searches:    0.006ms, 0 500 999 -1
    size=    1000: linear searches:    0.121ms, 902 808 422 -1
    size=    2000: binary searches:    0.008ms, 0 1000 1999 -1
    size=    2000: linear searches:    0.369ms, 733 1992 1866 -1
    size=    5000: binary searches:    0.010ms, 0 2500 4999 -1
    size=    5000: linear searches:    0.656ms, 2635 4605 1819 -1
    size=   10000: binary searches:    0.015ms, 0 5000 9999 -1
    size=   10000: linear searches:    1.137ms, 6722 8419 5128 -1
    size=   20000: binary searches:    0.012ms, 0 10000 19999 -1
    size=   20000: linear searches:    2.265ms, 16893 6637 10738 -1
    size=   50000: binary searches:    0.013ms, 0 25000 49999 -1
    size=   50000: linear searches:    4.538ms, 19705 40371 9661 -1
    size=  100000: binary searches:    0.014ms, 0 50000 99999 -1
    size=  100000: linear searches:    8.565ms, 42378 57447 33679 -1
    size=  200000: binary searches:    0.020ms, 0 100000 199999 -1
    size=  200000: linear searches:   26.003ms, 14037 198103 158988 -1
    size=  500000: binary searches:    0.016ms, 0 250000 499999 -1
    size=  500000: linear searches:   33.657ms, 162357 162899 18194 -1
    

    As you can see, binary search is much faster in the general case for large sizes, but linear search is much simpler and has faster or equivalent timings for up to 20 elements. A non recursive version of binary_search might lower the threshold a little bit. Note however that your implementation of binary_search does not always return the index of the first occurrence in case of duplicates.