I'm trying to implement multiclass classification with logistic regression on an Iris.csv dataset from Kaggle. This is my code.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def standardize(X_tr): # (x-Mean(x))/std(X) Normalizes data
for i in range(X_tr.shape[1]):
X_tr[:, i] = (X_tr[:, i] - np.mean(X_tr[:, i])) / np.std(X_tr[:, i])
return X_tr
def sigmoid(z): #Sigmoid/Logistic function
sig = 1 / (1 + np.exp(-z))
return sig
def cost(theta, X, y):
z = np.dot(X, theta)
cost0 = y.T.dot(np.log(sigmoid(z)))
cost1 = (1 - y).T.dot(np.log(1 - sigmoid(z)))
cost = -((cost1 + cost0)) / len(y)
return cost
def initialize(X): #Initializing X feature matrix and Theta vector
thetas = np.zeros((X.shape[1] + 1, len(np.unique(y))))
X = np.c_[np.ones((X.shape[0], 1)), X] # adding 691 rows of ones as the first column in X
return thetas, X
def fit(X, y, alpha=0.01, iterations=1000): # Gradient Descent
thetas_list = []
X = np.c_[np.ones((X.shape[0], 1)), X]
for i in range(len(np.unique(y))):
y_one_vs_all = np.where(y == np.unique(y)[i], 1, 0)
thetas, _ = initialize(X)
for j in range(iterations):
z = np.dot(X, thetas[:, i])
h = sigmoid(z)
gradient = np.dot(X.T, (h - y_one_vs_all)) / len(y)
thetas[:, i] -= alpha * gradient
thetas_list.append(thetas[:, i])
global gthetas
gthetas = thetas_list
return None
def predict(X):
X = np.c_[np.ones((X.shape[0], 1)), X]
predictions = []
for sample in X:
probs = []
for thetas in gthetas:
z = np.dot(sample, thetas)
probs.append(sigmoid(z))
predictions.append(np.argmax(probs) + 1)
return predictions
# load data
df = pd.read_csv("Iris.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
# convert class categorical values to numerical values
df['Species'].replace('Iris-setosa', 1, inplace=True)
df['Species'].replace('Iris-versicolor', 2, inplace=True)
df['Species'].replace('Iris-virginica', 3, inplace=True)
# prepare one-vs-all labels for multiclass classification
y1 = pd.DataFrame(np.zeros((len(y), len(np.unique(y)))))
for i in range(len(np.unique(y))):
for j in range(len(y1)):
if y[j] == np.unique(y)[i]:
y1.iloc[j, i] = 1
else:
y1.iloc[j, i] = 0
# split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2, random_state=0)
# standardize features
X_train = standardize(X_train)
X_test = standardize(X_test)
# fit logistic regression model
fit(X_train, y_train, alpha=0.01, iterations=400)
# make predictions on test set
predictions = predict(X_test)
print(predictions)
and below is the error I'm getting.
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14368/3997569506.py in <module>
1 # standardize features
2 X_train = standardize(X_train)
----> 3 thetas_list = fit(X_train, y_train)
4 plt.scatter(range(len(cost_list)), cost_list, c="blue")
5 plt.show()
~\AppData\Local\Temp/ipykernel_14368/3827160719.py in fit(X, y, alpha, iter)
6 thetas, _ = initialize(X)
7 for j in range(iter):
----> 8 z = dot(X, thetas[:, i])
9 h = sigmoid(z)
10 gradient = dot(X.T, (h - y_one_vs_all)) / len(y)
<__array_function__ internals> in dot(*args, **kwargs)
ValueError: shapes (120,6) and (7,) not aligned: 6 (dim 1) != 7 (dim 0)
Any help in fixing this error will be appreciated. I've looked into other answers on stack overflow and I still can't figure this out.
What I'm trying to accomplish: The following code is used as a base to create columns in y (outcome, Species class) for each class. So, the number of columns in y1 (given below) will be equal to the total number classes. That is 3 for iris flower dataset. For instance, for the first column (class is 1 which is Iris-setosa), whichever row in dataset is ‘Iris-setosa’ will be marked as 1 in y1’s corresponding row. Any other class (classes 2 and 3: Iris-versicolor and Iris-virginica) will be marked as 0 in y1’s first column.
for i in range(len(np.unique(y))):
for j in range(len(y1)):
if y[j] == np.unique(y)[i]:
y1.iloc[j, i] = 1 #one vs. all
else:
y1.iloc[j, i] = 0 #all others will be 0
You should check how the theta array is built. You are initializing it with in the initialize
function with the shape (X.shape[1] + 1, nrows)
. The errors tells you that you can not calculate the dot product between an array X
with shape (a, b)
and theta with shape (b+1, c)
. You can try to remove the +1
in the theta definition. Then you will find another problem regarding the gradient, where you are trying to calculate again a dot product between two incompatible arrays. I hope this helps you, good luck!