I’m trying to implement mini-batch gradient descent on the popular iris dataset, but somehow I don’t manage to get the accuracy of the model above 75-80%. Also the loss does not decrease and is rather stuck at around 0.45, even when I set the number of iterations to 10000. Something im missing here ?
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_stack = nn.Sequential(
nn.Linear(4,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,3),
)
def forward(self, x):
logits = self.linear_stack(x)
return logits
training loop, batchsize per epoch = 10. transform_label maps [0,1,2] to the labels.
lr = 0.01
model = NeuralNetwork()
optim = torch.optim.Adam(model.parameters(), lr=lr)
loss = torch.nn.CrossEntropyLoss()
n_iters = 1000
steps = n_iters/10
LOSS = []
for epochs in range(n_iters):
for i,(inputs, labels) in enumerate(train_loader):
out = model(inputs)
train_labels = transform_label(labels)
l = loss(out, train_labels)
l.backward()
#update weights
optim.step()
optim.zero_grad()
LOSS.append(l.item())
if epochs%steps == 0:
print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
#if i % 1 == 0:
#print(f" steps: {i+1}, loss : {l.item()}")
output:
epoch: 100/1000, loss: 1.0636296272277832
epoch: 400/1000, loss: 0.5142968013338076
epoch: 500/1000, loss: 0.49906910391073867
epoch: 900/1000, loss: 0.4586030915751588
epoch: 1000/1000, loss: 0.4543738731996598
Is it possible to calculate the loss like that or should I use torch.max()? If I do so I get this Error:
Expected floating point type for target with class probabilities, got Long
you didn't provide enough data and code to reproduce the problem. I wrote a complete and working code to train your model on the IRIS dataset.
Imports and Classes.
import torch
from torch import nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
class MyDataset(Dataset):
def __init__(self, X, Y):
assert len(X) == len(Y)
self.X = X
self.Y = Y
def __len__(self):
return len(self.X)
def __getitem__(self, item):
x = self.X[item]
y = self.Y[item]
return x, y
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_stack = nn.Sequential(
nn.Linear(4,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,3),
)
def forward(self, x):
logits = self.linear_stack(x)
return logits
Read and Preprocess the data.
# Dataset was downloaded from https://archive.ics.uci.edu/ml/machine-learning-databases/iris/
df = pd.read_csv("iris.data", names=["x1", "x2", "x3", "x4", "label"])
X, Y = df[['x1', "x2", "x3", "x4"]], df['label']
# First, we transform the labels to numbers 0,1,2
encoder = LabelEncoder()
Y = encoder.fit_transform(Y)
# We split the dataset to train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=123)
# Due to the nature of Neural Networks, we standardize the inputs to get better results
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
train_dataset = MyDataset(X_train, Y_train)
test_dataset = MyDataset(X_test, Y_test)
train_loader = DataLoader(train_dataset, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)
Train the model.
lr = 0.01
model = NeuralNetwork()
optim = torch.optim.Adam(model.parameters(), lr=lr)
loss = torch.nn.CrossEntropyLoss()
n_iters = 1000
steps = n_iters/10
LOSS = []
for epochs in range(n_iters):
for i,(inputs, labels) in enumerate(train_loader):
optim.zero_grad()
out = model(inputs.float())
l = loss(out, labels)
l.backward()
optim.step()
LOSS.append(l.item())
if epochs%steps == 0:
print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
Then, we need to run the model on test data to calculate the metrics.
preds = []
with torch.no_grad():
for i,(inputs, labels) in enumerate(test_loader):
out = model(inputs.float())
preds.extend(list(torch.argmax(out, axis=1).cpu().numpy()))
To get the metrics, you can use "classification_report".
print(classification_report(y_true=Y_test, y_pred=preds))
I hope my answer helps you.