I'm trying to implement MI algorithm. here is my code:
import numpy as np
from copy import copy
from sklearn import metrics
from sklearn import preprocessing
from sklearn.datasets import load_digits
data, labels = load_digits(return_X_y=True) # labels is Y vector #data shape = (1797, 64), and labels shape is (1797,1)
(n_samples, n_features), n_digits = data.shape, np.unique(labels).size
#normalize the data and set into DataFrame
scaler = preprocessing.StandardScaler()
d = scaler.fit_transform(data)
now here my implementation for MI:
def mi_algo(_data, _labels, size, defualt_x = 17, defualt_y = 10):
theta = np.zeros(_data.shape[1])
x_len = y_len = _data.shape[0]
py = np.array([len(_labels[_labels==y_val])/y_len for y_val in range(defualt_y)]) #P(y)
for col in range(len(theta)):
temp = np.copy(_data[:,col])
px = np.array([len(temp[temp==x_val])/x_len for x_val in range(defualt_x)]) #P(x)
for x in range(defualt_x):
if px[x] == 0:
continue
for y in range(defualt_y):
if py[y] == 0:
continue
pxy = np.sum((temp == x) & (labels == y)) #P(x,y)
pxy = np.divide(pxy,x_len)
yx = np.multiply(px[x],py[y])
pxy = np.divide(pxy, yx)
log = np.log2(pxy)
theta[col] += np.multiply(pxy,log)
return theta
I used all the np because I got some error. here is the output:
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:23: RuntimeWarning: divide by zero encountered in log2
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:24: RuntimeWarning: invalid value encountered in multiply
[ 0. nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
nan nan nan nan nan nan nan nan nan nan nan nan nan nan 0. nan nan nan
nan nan nan 0. nan nan nan nan nan nan nan nan nan nan nan nan nan nan
nan nan nan nan nan nan nan nan nan nan]
now, I understand there is some divide by zero, but I can't figure it out. Thnx for you help!
slove it by check if the divisor is 0 -> continue.