import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
def cosine_similarity(arr1:np.ndarray, arr2:np.ndarray)->float:
dot_product = np.dot(arr1, arr2)
magnitude = norm(arr1) * norm(arr2)
similarity = dot_product / magnitude
return similarity
def euclidean_distance(arr1:np.ndarray, arr2:np.ndarray)->float:
return 1 / (1 + np.linalg.norm(arr1 - arr2))
black = np.array([0.93036434, 0.80134155, 0.82428051, 0.88877041, 0.90235719,
0.86631497, 0.82428051, 0.84878065, 0.99113482, 0.81413637,
0.82428051, 0.80268685, 0.76705671, 0.76605398, 0.82428051,
0.81137288, 0.83886563, 0.80749507, 0.82428051])
blue = np.array([1., 0.75256457, 0.78572852, 0.84459419, 0.88112504,
0.82160288, 0.78572852, 0.8022456 , 0.9949841 , 0.78979966,
0.78572852, 0.76791598, 0.70410357, 0.72986952, 0.78572852,
0.76683488, 0.78731431, 0.77301876, 0.78572852])
green = np.array([1., 0.62172262, 0.60678783, 0.57714708, 0.73848085,
0.69695676, 0.60678783, 0.58584646, 0.60622072, 0.6202182 ,
0.60678783, 0.57949767, 0.52131047, 0.5814518 , 0.60678783,
0.5958478 , 0.62959938, 0.60829778, 0.60678783])
fig = plt.figure(figsize=(8, 4), dpi=80)
gs = fig.add_gridspec(1, hspace=0)
axs = gs.subplots()
print("cosine_similarity = ", cosine_similarity(black, blue))
print("cosine_similarity = ", cosine_similarity(black, green))
print("euclidean_distance = ", euclidean_distance(black, blue))
print("euclidean_distance = ", euclidean_distance(black, green))
axs.plot(black, color='black')
axs.plot(blue, color='blue')
axs.plot(green, color='green')
fig.tight_layout()
plt.show()
I'm trying to create a similarity factor between two numpy arrays based on shape rather than distance. Even though the shapes (blue and green) are visually different the code prints almost the same factor.
cosine_similarity = 0.9993680126707705
cosine_similarity = 0.9914859250612972
You can use numpy.corrcoeff
For example:
import numpy as np
print(np.corrcoeff(black, blue)[0,1]) # Gives 0.96027588
print(np.corrcoff(black, green)[0,1]) # Gives 0.52967232