I'm struggling to find an efficient way to implement this interpolated convex hull data treatment.
I have a 2D ndarray, call it arr
, with shape (2000000,19), containing floats.
I have a 1D ndarray, call it w
, with shape (19,), also containing floats.
What I achieved (and works perfectly except that it's excruciatingly slow) is the following:
import numpy as np
from scipy.interpolate import interp1d
# Sample data
arr = np.array([[49.38639913, 49.76769437, 49.66370476, 49.49905455, 49.15242251,
48.0518658 , 45.998071 , 45.31347273, 45.29614113, 45.25281212,
45.0448329 , 44.61154286, 43.72763117, 42.38443203, 41.17121991,
40.48662165, 40.35663463, 39.88001558, 39.55938095],
[47.97387359, 47.86121818, 47.69656797, 47.18528571, 46.70000087,
45.39146494, 43.50232035, 43.18168571, 43.82295498, 43.62364156,
43.31167273, 42.88704848, 42.37576623, 41.0585645 , 40.37396623,
39.09142771, 38.79679048, 38.51948485, 38.52815065]])
w = np.array([2.1017, 2.1197, 2.1374, 2.1548, 2.172 , 2.1893, 2.2068, 2.2254,
2.2417, 2.2592, 2.2756, 2.2928, 2.3097, 2.326 , 2.3421, 2.3588,
2.3745, 2.3903, 2.4064])
def upper_andrews_hull(points: np.ndarray):
"""
Computes the upper half of the convex hull of a set of 2D points.
:param points: an iterable sequence of (x, y) pairs representing the points.
"""
# 2D cross product of OA and OB vectors, i.e. z-component of their 3D cross product.
# Returns a positive value, if OAB makes a counter-clockwise turn,
# negative for clockwise turn, and zero if the points are collinear.
def cross(o, a, b):
return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
# Reverse the points so that we can pop from the end
points = np.flip(points, axis=0)
# Build upper hull
upper = []
for p in points:
while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0:
upper.pop()
upper.append(p)
# Reverse the upper hull
upper = np.flip(np.array(upper), axis=0)
return upper
result = np.empty(arr.shape)
for i in range(arr.shape[0]):
# Create points, using w as x values, and arr as y values
points = np.stack((w, arr[i,:]), axis=1)
# Calculate the convex hull around the points
hull = upper_andrews_hull(points)
# Interpolate the hull
interp_function = interp1d(*hull.T)
# Store interpolation's result to have the same x references as original points
result[i,:] = interp_function(w)
I'm pretty sure that there's a way to forego the loop and only use vectorial calculations, but I can't find it (plus, there's the issue that hull
doesn't always have the same number of points, so all of the hulls would not be storable in an ndarray.
My expected behaviour would be something like result = interpolated_hull(arr, w, axis=0)
, to apply the operations on the entire arr
array without a loop.
What about using numba
in your code? This gives me a speed improvement on my machine by a factor of:
loop_hulls_numba
: ~70 (single core) or 70s to 0.973sloop_hulls_numba_parallel
: ~350 (multiple cores) or 70s to 0.223s (depends strongly on the number of CPUs)based on a 2'000'000 by 16 array!
Speed tests are performed with timeit
, i.e. %timeit -n 3 -r 3 loop_hulls_numba(w, arr_n)
. numba
needs some extra time for the first compilation, which I excluded in the speed test.
n = int(2e6)
arr_n = np.resize(arr, (n, 19))
@jit(nopython=True)
def cross_numba(o, a, b):
return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
@jit(nopython=True)
def upper_andrews_hull_numba(x: np.ndarray, y: np.ndarray):
"""
Computes the upper half of the convex hull of a set of 2D points.
:param points: an iterable sequence of (x, y) pairs representing the points.
"""
# 2D cross product of OA and OB vectors, i.e. z-component of their 3D cross product.
# Returns a positive value, if OAB makes a counter-clockwise turn,
# negative for clockwise turn, and zero if the points are collinear.
# Build upper hull
upper = [(x[-1], y[-1]),
(x[-2], y[-2])]
# Reverse the points so that we can pop from the end
for p in zip(x[-3::-1], y[-3::-1]):
while len(upper) >= 2 and cross_numba(upper[-2], upper[-1], p) <= 0:
upper.pop()
upper.append(p)
# Reverse the upper hull
return upper[::-1]
@jit(nopython=True)
def loop_hulls_numba(w, arr):
result = np.zeros(arr.shape)
for i in range(arr.shape[0]):
# Calculate the convex hull around the points
hull = np.array(upper_andrews_hull_numba(w, arr[i,:]))
# # Interpolate the hull
x, y = hull.T
result[i,:] = np.interp(w, x, y)
return result
@jit(nopython=True, parallel=True)
def loop_hulls_numba_parallel(w, arr):
result = np.zeros(arr.shape)
for i in prange(arr.shape[0]):
# Calculate the convex hull around the points
hull = np.array(upper_andrews_hull_numba(w, arr[i,:]))
# # Interpolate the hull
x, y = hull.T
result[i] = np.interp(w, x, y)
return result