pythonsimilarity

similarity between two numpy arrays based on shape but not distance


import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm

def cosine_similarity(arr1:np.ndarray, arr2:np.ndarray)->float:
    dot_product = np.dot(arr1, arr2)
    magnitude = norm(arr1) * norm(arr2)
    similarity = dot_product / magnitude
    return similarity

def euclidean_distance(arr1:np.ndarray, arr2:np.ndarray)->float:
    return 1 / (1 + np.linalg.norm(arr1 - arr2))    

black = np.array([0.93036434, 0.80134155, 0.82428051, 0.88877041, 0.90235719,
       0.86631497, 0.82428051, 0.84878065, 0.99113482, 0.81413637,
       0.82428051, 0.80268685, 0.76705671, 0.76605398, 0.82428051,
       0.81137288, 0.83886563, 0.80749507, 0.82428051])
blue = np.array([1., 0.75256457, 0.78572852, 0.84459419, 0.88112504,
       0.82160288, 0.78572852, 0.8022456 , 0.9949841 , 0.78979966,
       0.78572852, 0.76791598, 0.70410357, 0.72986952, 0.78572852,
       0.76683488, 0.78731431, 0.77301876, 0.78572852])  
green = np.array([1., 0.62172262, 0.60678783, 0.57714708, 0.73848085,
       0.69695676, 0.60678783, 0.58584646, 0.60622072, 0.6202182 ,
       0.60678783, 0.57949767, 0.52131047, 0.5814518 , 0.60678783,
       0.5958478 , 0.62959938, 0.60829778, 0.60678783]) 

fig = plt.figure(figsize=(8, 4), dpi=80)

gs = fig.add_gridspec(1, hspace=0)
axs = gs.subplots()

print("cosine_similarity = ", cosine_similarity(black, blue))
print("cosine_similarity = ", cosine_similarity(black, green))

print("euclidean_distance = ", euclidean_distance(black, blue))
print("euclidean_distance = ", euclidean_distance(black, green))

axs.plot(black, color='black')
axs.plot(blue, color='blue')
axs.plot(green, color='green')    

fig.tight_layout()
plt.show()              

angular similarity

I'm trying to create a similarity factor between two numpy arrays based on shape rather than distance. Even though the shapes (blue and green) are visually different the code prints almost the same factor.

cosine_similarity =  0.9993680126707705
cosine_similarity =  0.9914859250612972

Solution

  • You can use numpy.corrcoeff

    For example:

    import numpy as np
    print(np.corrcoeff(black, blue)[0,1]) # Gives 0.96027588
    print(np.corrcoff(black, green)[0,1]) # Gives 0.52967232