I am creating a binary classifier based on the MNIST dataset using PyTorch. I want my classifier to classify between only 0s and 1s, however, when I train it, the error doesn't decrease and the loss becomes negative. Here's the error and loss at the first few iterations:
Train Err | Train Loss | Test Err | Test Loss |
---|---|---|---|
0.098717 | -138227.790601 | 0.098000 | -283448.859219 |
0.098717 | -415023.714219 | 0.098000 | -566828.664687 |
0.098717 | -691819.936146 | 0.098000 | -850208.527500 |
0.098717 | -968615.941562 | 0.098000 | -1133588.344375 |
0.098717 | -1245411.727292 | 0.098000 | -1416968.306900 |
0.098717 | -1522207.517708 | 0.098000 | -1700348.221250 |
I was obviously expecting better results.
Here is the code I am using:
# Loading the MNIST data reduced to the 0/1 examples
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
mnist_train = datasets.MNIST("./data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST("./data", train=False, download=True, transform=transforms.ToTensor())
train_idx = mnist_train.train_labels <= 1
try:
mnist_train.train_data = mnist_train.train_data[train_idx]
except AttributeError:
mnist_train._train_data = mnist_train.train_data[train_idx]
try:
mnist_train.train_labels = mnist_train.train_labels[train_idx]
except AttributeError:
mnist_train._train_labels = mnist_train.train_labels[train_idx]
test_idx = mnist_test.test_labels <= 1
try:
mnist_test.test_data = mnist_test.test_data[test_idx]
except AttributeError:
mnist_test._test_data = mnist_test.test_data[test_idx]
try:
mnist_test.test_labels = mnist_test.test_labels[test_idx]
except AttributeError:
mnist_test._test_labels = mnist_test.test_labels[test_idx]
train_loader = DataLoader(mnist_train, batch_size = 100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size = 100, shuffle=False)
# Creating a simple linear classifier
import torch
import torch.nn as nn
import torch.optim as optim
# do a single pass over the data
def epoch(loader, model, opt=None):
total_loss, total_err = 0.,0.
for X,y in loader:
yp = model(X.view(X.shape[0], -1))[:,0]
loss = nn.BCEWithLogitsLoss()(yp, y.float())
if opt:
opt.zero_grad()
loss.backward()
opt.step()
total_err += ((yp > 0) * (y==0) + (yp < 0) * (y==1)).sum().item()
total_loss += loss.item() * X.shape[0]
return total_err / len(loader.dataset), total_loss / len(loader.dataset)
model = nn.Linear(784, 1)
opt = optim.SGD(model.parameters(), lr=1)
print("Train Err", "Train Loss", "Test Err", "Test Loss", sep="\t")
for i in range(10):
train_err, train_loss = epoch(train_loader, model, opt)
test_err, test_loss = epoch(test_loader, model)
print(*("{:.6f}".format(i) for i in (train_err, train_loss, test_err, test_loss)), sep="\t")
I don't know why my error does not decrease nor why my loss keeps getting more negative. Does anyone spot the error?
I found the error. My initial code to select only 1s and 0s from the MNIST dataset didn't work. So obviously, applying BCELoss to a non-binary dataset was making the model fail.