pythonnumpyvectorizationconvex-hull

Vectorize a Convex Hull and Interpolation loop on a specific axis of an ndarray


I'm struggling to find an efficient way to implement this interpolated convex hull data treatment.

I have a 2D ndarray, call it arr, with shape (2000000,19), containing floats. I have a 1D ndarray, call it w, with shape (19,), also containing floats.

What I achieved (and works perfectly except that it's excruciatingly slow) is the following:

import numpy as np
from scipy.interpolate import interp1d

# Sample data
arr = np.array([[49.38639913, 49.76769437, 49.66370476, 49.49905455, 49.15242251,
        48.0518658 , 45.998071  , 45.31347273, 45.29614113, 45.25281212,
        45.0448329 , 44.61154286, 43.72763117, 42.38443203, 41.17121991,
        40.48662165, 40.35663463, 39.88001558, 39.55938095],
       [47.97387359, 47.86121818, 47.69656797, 47.18528571, 46.70000087,
        45.39146494, 43.50232035, 43.18168571, 43.82295498, 43.62364156,
        43.31167273, 42.88704848, 42.37576623, 41.0585645 , 40.37396623,
        39.09142771, 38.79679048, 38.51948485, 38.52815065]])
w = np.array([2.1017, 2.1197, 2.1374, 2.1548, 2.172 , 2.1893, 2.2068, 2.2254,
       2.2417, 2.2592, 2.2756, 2.2928, 2.3097, 2.326 , 2.3421, 2.3588,
       2.3745, 2.3903, 2.4064])
def upper_andrews_hull(points: np.ndarray):
    """
    Computes the upper half of the convex hull of a set of 2D points.
    :param points: an iterable sequence of (x, y) pairs representing the points.
    """
    # 2D cross product of OA and OB vectors, i.e. z-component of their 3D cross product.
    # Returns a positive value, if OAB makes a counter-clockwise turn,
    # negative for clockwise turn, and zero if the points are collinear.
    def cross(o, a, b):
        return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])

    # Reverse the points so that we can pop from the end
    points = np.flip(points, axis=0)
    # Build upper hull
    upper = []
    for p in points:
        while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0:
            upper.pop()
        upper.append(p)
    # Reverse the upper hull
    upper = np.flip(np.array(upper), axis=0)

    return upper
result = np.empty(arr.shape)
for i in range(arr.shape[0]):
    # Create points, using w as x values, and arr as y values
    points = np.stack((w, arr[i,:]), axis=1)
    # Calculate the convex hull around the points
    hull = upper_andrews_hull(points)
    # Interpolate the hull
    interp_function = interp1d(*hull.T)
    # Store interpolation's result to have the same x references as original points
    result[i,:] = interp_function(w)

I'm pretty sure that there's a way to forego the loop and only use vectorial calculations, but I can't find it (plus, there's the issue that hull doesn't always have the same number of points, so all of the hulls would not be storable in an ndarray.

My expected behaviour would be something like result = interpolated_hull(arr, w, axis=0), to apply the operations on the entire arr array without a loop.


Solution

  • What about using numba in your code? This gives me a speed improvement on my machine by a factor of:

    based on a 2'000'000 by 16 array!

    Speed tests are performed with timeit, i.e. %timeit -n 3 -r 3 loop_hulls_numba(w, arr_n). numba needs some extra time for the first compilation, which I excluded in the speed test.

    Generate a bigger array for testing

    n = int(2e6)
    arr_n = np.resize(arr, (n, 19))
    

    The numba solution

    @jit(nopython=True)
    def cross_numba(o, a, b):
        return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
    
    @jit(nopython=True)
    def upper_andrews_hull_numba(x: np.ndarray, y: np.ndarray):
        """
        Computes the upper half of the convex hull of a set of 2D points.
        :param points: an iterable sequence of (x, y) pairs representing the points.
        """
        # 2D cross product of OA and OB vectors, i.e. z-component of their 3D cross product.
        # Returns a positive value, if OAB makes a counter-clockwise turn,
        # negative for clockwise turn, and zero if the points are collinear.
        
        # Build upper hull
        upper = [(x[-1], y[-1]), 
                 (x[-2], y[-2])]
        # Reverse the points so that we can pop from the end
        for p in zip(x[-3::-1], y[-3::-1]):
            while len(upper) >= 2 and cross_numba(upper[-2], upper[-1], p) <= 0:
                upper.pop()
            upper.append(p)
    
        # Reverse the upper hull
        return upper[::-1]
    
    @jit(nopython=True)
    def loop_hulls_numba(w, arr):
        result = np.zeros(arr.shape)
        for i in range(arr.shape[0]):
            # Calculate the convex hull around the points
            hull = np.array(upper_andrews_hull_numba(w, arr[i,:]))
            # # Interpolate the hull
            x, y = hull.T
            result[i,:] = np.interp(w, x, y)
        return result
    

    Get the maximum out of your CPU by parallelization

    @jit(nopython=True, parallel=True)
    def loop_hulls_numba_parallel(w, arr):
        result = np.zeros(arr.shape)
        for i in prange(arr.shape[0]):
            # Calculate the convex hull around the points
            hull = np.array(upper_andrews_hull_numba(w, arr[i,:]))
            # # Interpolate the hull
            x, y = hull.T
            result[i] = np.interp(w, x, y)
        return result