pythonpytorchgradientiris-datasetmini-batch

mini-batch gradient descent, loss doesn't improve and accuracy very low


I’m trying to implement mini-batch gradient descent on the popular iris dataset, but somehow I don’t manage to get the accuracy of the model above 75-80%. Also the loss does not decrease and is rather stuck at around 0.45, even when I set the number of iterations to 10000. Something im missing here ?

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_stack =  nn.Sequential(
         nn.Linear(4,128),
         nn.ReLU(),
         nn.Linear(128,64),
         nn.ReLU(),
         nn.Linear(64,3),
         )
    def forward(self, x):
        logits = self.linear_stack(x)
        return logits

training loop, batchsize per epoch = 10. transform_label maps [0,1,2] to the labels.

lr = 0.01
model = NeuralNetwork()
optim = torch.optim.Adam(model.parameters(), lr=lr)
loss = torch.nn.CrossEntropyLoss()

n_iters = 1000
steps = n_iters/10
LOSS = []
for epochs in range(n_iters):  
    for i,(inputs, labels) in enumerate(train_loader):
        out = model(inputs)
        train_labels = transform_label(labels)
        l = loss(out, train_labels)
        l.backward()
        #update weights
        optim.step()
        optim.zero_grad()
    LOSS.append(l.item())
    if epochs%steps == 0:
        print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
        #if i % 1 == 0:
            #print(f" steps: {i+1}, loss : {l.item()}")

output:

epoch: 100/1000, loss: 1.0636296272277832

epoch: 400/1000, loss: 0.5142968013338076

epoch: 500/1000, loss: 0.49906910391073867

epoch: 900/1000, loss: 0.4586030915751588

epoch: 1000/1000, loss: 0.4543738731996598

Is it possible to calculate the loss like that or should I use torch.max()? If I do so I get this Error:

Expected floating point type for target with class probabilities, got Long


Solution

  • you didn't provide enough data and code to reproduce the problem. I wrote a complete and working code to train your model on the IRIS dataset.

    Imports and Classes.

    import torch
    from torch import nn
    import pandas as pd
    from torch.utils.data import Dataset, DataLoader
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    from sklearn.metrics import classification_report
    
    class MyDataset(Dataset):
        def __init__(self, X, Y):
            assert len(X) == len(Y)
            self.X = X
            self.Y = Y
            
        def __len__(self):
            return len(self.X)
        
        def __getitem__(self, item):
            x = self.X[item]
            y = self.Y[item]
            return x, y
    
    class NeuralNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.linear_stack =  nn.Sequential(
             nn.Linear(4,128),
             nn.ReLU(),
             nn.Linear(128,64),
             nn.ReLU(),
             nn.Linear(64,3),
             )
        def forward(self, x):
            logits = self.linear_stack(x)
            return logits
    

    Read and Preprocess the data.

    # Dataset was downloaded from https://archive.ics.uci.edu/ml/machine-learning-databases/iris/
    df = pd.read_csv("iris.data", names=["x1", "x2", "x3", "x4", "label"])
    X, Y = df[['x1', "x2", "x3", "x4"]], df['label']
    
    # First, we transform the labels to numbers 0,1,2
    encoder = LabelEncoder()
    Y = encoder.fit_transform(Y)
    
    # We split the dataset to train and test
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=123)
    
    # Due to the nature of Neural Networks, we standardize the inputs to get better results
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    train_dataset = MyDataset(X_train, Y_train)
    test_dataset = MyDataset(X_test, Y_test)
    
    train_loader = DataLoader(train_dataset, batch_size=8)
    test_loader = DataLoader(test_dataset, batch_size=8)    
    

    Train the model.

    lr = 0.01
    model = NeuralNetwork()
    optim = torch.optim.Adam(model.parameters(), lr=lr)
    loss = torch.nn.CrossEntropyLoss()
    
    n_iters = 1000
    steps = n_iters/10
    LOSS = []
    
    for epochs in range(n_iters):  
        for i,(inputs, labels) in enumerate(train_loader):
            optim.zero_grad()
            out = model(inputs.float())
            l = loss(out, labels)
            l.backward()
            optim.step()
        LOSS.append(l.item())
        if epochs%steps == 0:
            print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
            
    

    output: trainin_output

    Then, we need to run the model on test data to calculate the metrics.

    preds = []
    with torch.no_grad():
        for i,(inputs, labels) in enumerate(test_loader):
            out = model(inputs.float())
            preds.extend(list(torch.argmax(out, axis=1).cpu().numpy()))
    

    To get the metrics, you can use "classification_report".

    print(classification_report(y_true=Y_test, y_pred=preds))
    

    output: metrics

    I hope my answer helps you.