I want to get through Fashion_Mnist data, I would like to see the output gradient which might be mean squared sum between first and second layer
My code first below
#import the nescessary libs
import numpy as np
import torch
import time
# Loading the Fashion-MNIST dataset
from torchvision import datasets, transforms
# Get GPU Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.get_device_name(0)
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# Download and load the training data
trainset = datasets.FashionMNIST('MNIST_data/', download = True, train = True, transform = transform)
testset = datasets.FashionMNIST('MNIST_data/', download = True, train = False, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 128, shuffle = True, num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size = 128, shuffle = True, num_workers=4)
# Examine a sample
dataiter = iter(trainloader)
images, labels = dataiter.next()
# Define the network architecture
from torch import nn, optim
import torch.nn.functional as F
model = nn.Sequential(nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 10),
nn.LogSoftmax(dim = 1)
)
model.to(device)
# Define the loss
criterion = nn.MSELoss()
# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr = 0.001)
# Define the epochs
epochs = 5
train_losses, test_losses = [], []
squared_sum = []
# start = time.time()
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
# Flatten Fashion-MNIST images into a 784 long vector
images = images.to(device)
labels = labels.to(device)
images = images.view(images.shape[0], -1)
optimizer.zero_grad()
output = model[0].forward(images)
loss = criterion(output[0], labels.float())
loss.backward()
optimizer.step()
running_loss += loss.item()
else:
print(running_loss)
test_loss = 0
accuracy = 0
# Turn off gradients for validation, saves memory and computation
with torch.no_grad():
# Set the model to evaluation mode
model.eval()
# Validation pass
for images, labels in testloader:
images = images.to(device)
labels = labels.to(device)
images = images.view(images.shape[0], -1)
ps = model(images[0])
test_loss += criterion(ps, labels)
top_p, top_class = ps.topk(1, dim = 1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
model.train()
print("Epoch: {}/{}..".format(e+1, epochs),
"Training loss: {:.3f}..".format(running_loss/len(trainloader)),
"Test loss: {:.3f}..".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
What I want to get,
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
# Flatten Fashion-MNIST images into a 784 long vector
images = images.to(device)
labels = labels.to(device)
images = images.view(images.shape[0], -1)
optimizer.zero_grad()
output = model[0].forward(images)
loss = criterion(output[0], labels.float())
loss.backward()
optimizer.step()
running_loss += loss.item()
In here, model[0] (This might be the first layer nn.Linear(784, 128)), I would love to get the mean square errors only for first and second layer,
If I run this code, I receive this error below
RuntimeError: The size of tensor a (128) must match the size of tensor b (96) at non-singleton dimension 0
If I want to run this code correctly to get the MSELoss, what I need to do?
The error is caused by the number of samples in the dataset and the batch size.
In more detail, the training MNIST dataset includes 60,000 samples, your current batch_size
is 128 and you will need 60000/128=468.75
loops to finish training on one epoch. So the problem comes from here, for 468 loops, your data will have 128 samples but the last loop just contains 60000 - 468*128 = 96
samples.
To solve this problem, I think you need to find the suitable batch_size
and the number of neural in your model as well.
I think it should work for computing loss
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 96, shuffle = True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size = 96, shuffle = True, num_workers=0)
model = nn.Sequential(nn.Linear(784, 96),
nn.ReLU(),
nn.Linear(96, 10),
nn.LogSoftmax(dim = 1)
)