pythonnumpyscipyinterpolationndimage

How to accelerate scipy.map_coordinates for multiple interpolations?


I have several values f, g, h that are defined on the same regular grid (x, y, z) that I want to interpolate onto a new grid (x1, y1, z1). i.e., I have f(x, y, z) , g(x, y, z) , h(x, y, z) and I want to calculate f(x1, y1, z1), g(x1, y1, z1), h(x1, y1, z1).

I am using scipy.map_coordinates at the moment. However, each interpolation is done separately and the number of points are around 4,000,000, so it is quite slow

from scipy.ndimage import map_coordinates
import numpy as np

# examples of f, g, h
f=np.random.randn(100,50,50)
g=np.random.randn(100,50,50)
h=np.random.randn(100,50,50)

# examples of x1, y1, z1
x1=np.random.rand(4000000)*100
y1=np.random.rand(4000000)*50
z1=np.random.rand(4000000)*50

# my solution at the moment
coords=np.array([x1,y1,z1])

out = np.zeros((3, coords.shape[1]))
out[0]= map_coordinates(f, coords, order=1)
out[1]= map_coordinates(g, coords, order=1)
out[2]= map_coordinates(h, coords, order=1)

Is there a way to accelerate the calculation?


Solution

  • This is just a short comment on @Han-Kwang Nienhuys answer. The main thing to improve here is to avoid vectorized commands, which can lead to a quite high performance degradation.

    Generally it would be a good idea to change the array shapes of input and output (n,3) instead (3,n) if you use default C-ordered arrays.

    Input

    import numpy as np
    import numba as nb
    from scipy.ndimage import map_coordinates
    
    # examples of f, g, h
    f=np.random.randn(100,50,50)
    g=np.random.randn(100,50,50)
    h=np.random.randn(100,50,50)
    
    n=4_000_000
    # examples of x1, y1, z1
    x1=np.random.rand(n)*99
    y1=np.random.rand(n)*49
    z1=np.random.rand(n)*49
    
    coords=np.array((x1,y1,z1))
    fgh = np.array([f, g, h]).T.copy().T # optimize memory layout
    

    Code

    #from Han-Kwang Nienhuys
    @nb.njit(fastmath=True)
    def mymap(ars, coords):
        """ars is input arrays, shape (m, nx, ny, nz)
        coords is coordinate array, float, shape (3, n)
        """
        # these have shape (n, 3)
        ijk = coords.T.astype(np.int16)
        fijk = (coords.T - ijk).astype(np.float32)
        n = ijk.shape[0]
        m = ars.shape[0]
        out = np.empty((n, m), dtype=np.float64)
    
        for l in range(n):
            i0, j0, k0 = ijk[l, :3]
            # Note: don't write i1, j1, k1 = ijk[l, :3]+1 -- much slower.
            i1, j1, k1 = i0+1, j0+1, k0+1
            fi1, fj1, fk1 = fijk[l, :3]
            fi0, fj0, fk0 = 1-fi1, 1-fj1, 1-fk1
            out[l, :] = (
                fi0 * fj0 * fk0 * ars[:, i0, j0, k0] +
                fi0 * fj0 * fk1 * ars[:, i0, j0, k1] +
                fi0 * fj1 * fk0 * ars[:, i0, j1, k0] +
                fi0 * fj1 * fk1 * ars[:, i0, j1, k1] +
                fi1 * fj0 * fk0 * ars[:, i1, j0, k0] +
                fi1 * fj0 * fk1 * ars[:, i1, j0, k1] +
                fi1 * fj1 * fk0 * ars[:, i1, j1, k0] +
                fi1 * fj1 * fk1 * ars[:, i1, j1, k1]
                )
        return out.T
    
    #optimized version
    @nb.njit(fastmath=True,parallel=False)
    def mymap_opt(ars, coords):
        """ars is input arrays, shape (m, nx, ny, nz)
        coords is coordinate array, float, shape (3, n)
        """
        # these have shape (n, 3)
        ijk = coords.T.astype(np.int16)
        fijk = (coords.T - ijk).astype(np.float32)
        n = ijk.shape[0]
        m = ars.shape[0]
        out = np.empty((n, m), dtype=np.float64)
    
        for l in nb.prange(n):
            i0= ijk[l, 0]
            j0= ijk[l, 1]
            k0 =ijk[l, 2]
            # Note: don't write i1, j1, k1 = ijk[l, :3]+1 -- much slower.
            i1, j1, k1 = i0+1, j0+1, k0+1
            fi1=  fijk[l, 0] 
            fj1=  fijk[l, 1] 
            fk1 = fijk[l, 2]
    
            fi0, fj0, fk0 = 1-fi1, 1-fj1, 1-fk1
            for i in range(ars.shape[0]):
                out[l, i] = (
                    fi0 * fj0 * fk0 * ars[i, i0, j0, k0] +
                    fi0 * fj0 * fk1 * ars[i, i0, j0, k1] +
                    fi0 * fj1 * fk0 * ars[i, i0, j1, k0] +
                    fi0 * fj1 * fk1 * ars[i, i0, j1, k1] +
                    fi1 * fj0 * fk0 * ars[i, i1, j0, k0] +
                    fi1 * fj0 * fk1 * ars[i, i1, j0, k1] +
                    fi1 * fj1 * fk0 * ars[i, i1, j1, k0] +
                    fi1 * fj1 * fk1 * ars[i, i1, j1, k1]
                    )
        return out.T
    

    Timings

    out_1 = mymap(fgh, coords)
    out_2 = mymap_opt(fgh, coords)
    print(np.allclose(out_1,out_2))
    #True
    
    %timeit out = mymap(fgh, coords)
    #1.09 s ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    %timeit out = mymap_opt(fgh, coords)
    #parallel=True
    #144 ms ± 5.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    #parallel=False
    #259 ms ± 4.76 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)