Found this gist using numba for fast computation of cosine similarity.
import numba
@numba.jit(target='cpu', nopython=True)
def fast_cosine(u, v):
m = u.shape[0]
udotv = 0
u_norm = 0
v_norm = 0
for i in range(m):
if (np.isnan(u[i])) or (np.isnan(v[i])):
continue
udotv += u[i] * v[i]
u_norm += u[i] * u[i]
v_norm += v[i] * v[i]
u_norm = np.sqrt(u_norm)
v_norm = np.sqrt(v_norm)
if (u_norm == 0) or (v_norm == 0):
ratio = 1.0
else:
ratio = udotv / (u_norm * v_norm)
return ratio
Results look promising (500ns vs. only 200us without jit decorator in my machine).
I would like to use numba to parallelize this computation between a vector u
and a candidate matrix M
-- i.e. cosine across each row.
Example:
def fast_cosine_matrix(u, M):
"""
Return array of cosine similarity between u and rows in M
>>> import numpy as np
>>> u = np.random.rand(100)
>>> M = np.random.rand(10, 100)
>>> fast_cosine_matrix(u, M)
"""
One way is to just rewrite with second input a matrix. But I get a NotImplementedError
if I try to iterate over the rows of a matrix. Going to try just using slices.
I thought about using vectorize
but I can't get it to work.
Alternative: make a Generalized UFunc with numba
@numba.guvectorize(["void(float64[:], float64[:], float64[:])"], "(n),(n)->()", target='parallel')
def fast_cosine_gufunc(u, v, result):
m = u.shape[0]
udotv = 0
u_norm = 0
v_norm = 0
for i in range(m):
if (np.isnan(u[i])) or (np.isnan(v[i])):
continue
udotv += u[i] * v[i]
u_norm += u[i] * u[i]
v_norm += v[i] * v[i]
u_norm = np.sqrt(u_norm)
v_norm = np.sqrt(v_norm)
if (u_norm == 0) or (v_norm == 0):
ratio = 1.0
else:
ratio = udotv / (u_norm * v_norm)
result[:] = ratio
u = np.random.rand(100)
M = np.random.rand(100000, 100)
fast_cosine_gufunc(u, M[0,:])
fast_cosine_gufunc(u, M)