I'm running a Pytorch model training, where I defined the getitem method of the dataset to return:
def __getitem__(self, ind):
[...]
return processed_images, target
processed_images is a sequence of 5 RGB images with shape (224,224), while target is a 4-dim vector with one-hot encoding for the classes targets.
So, each call of getitem is returning, for example:
>> processed_images.shape
(5, 224, 224, 3)
>> target
[0.0, 1.0, 0.0, 0.0]
In the training script, I'm extracting batch using:
train_dataloader = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
drop_last=False,
persistent_workers=False,
timeout=0,
)
for epoch in range(number_of_epochs):
for batch_ind, batch_data in enumerate(train_dataloader):
datas, targets = batch_data
The problem is datas has the correct shape, i.e. a stack of 22 sequences of images:
datas.shape
torch.Size([22, 5, 224, 224, 3])
However, targets are stacked in a weird way:
len(targets) = 4
len(targets[0]) = 22
while I would expect the opposite (a list of 22 elements, each of len=4). Am I doing something wrong?
I have recreated a minimum functioning code snippet from your description below (with a batch size 6 for a clearer output).
import torch
from torch.utils.data import Dataset
class CustomImageDataset(Dataset):
def __init__(self):
self.name = "test"
def __len__(self):
return 100
def __getitem__(self, idx):
label = [0, 1.0, 0, 0]
image = torch.randn((5, 3, 224, 224), dtype=torch.float32)
return image, label
train_dataset = CustomImageDataset()
train_dataloader = torch.utils.data.DataLoader(
train_dataset,
batch_size=6,
shuffle=True,
drop_last=False,
persistent_workers=False,
timeout=0,
)
for idx, data in enumerate(train_dataloader):
datas = data[0]
labels = data[1]
print("Datas shape:", datas.shape)
print("Labels:", labels)
("Labels shape:", len(labels))
print("Labels[0] shape:", len(labels[0]))
break
This code yields the following output:
Datas shape: torch.Size([6, 5, 3, 224, 224])
Labels: [tensor([0, 0, 0, 0, 0, 0]), tensor([1., 1., 1., 1., 1., 1.], dtype=torch.float64), tensor([0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0])]
Labels shape: 4
Labels[0] shape: 6
As you can clearly see when inspecting labels
your list gets appended elementwise to a tensor. I.e. all 0th elements of you one-hot-encoded lists are put in the same tensor. Namely the the 0th tensor of your labels
list.
As this is not the outcome you expect (you want a list of 22 entries with one list for each one-hot-encoding) the easiest way is to directly define your label as a torch.tensor([0, 1.0, 0, 0])
as suggested in the comment by SenseiH.
The adapted snipped would look like this:
import torch
from torch.utils.data import Dataset
class CustomImageDataset(Dataset):
def __init__(self):
self.name = "test"
def __len__(self):
return 100
def __getitem__(self, idx):
label = torch.tensor([0, 1.0, 0, 0])
image = torch.randn((5, 3, 224, 224), dtype=torch.float32)
return image, label
train_dataset = CustomImageDataset()
train_dataloader = torch.utils.data.DataLoader(
train_dataset,
batch_size=6,
shuffle=True,
drop_last=False,
persistent_workers=False,
timeout=0,
)
for idx, data in enumerate(train_dataloader):
datas = data[0]
labels = data[1]
print("Datas shape:", datas.shape)
print("Labels:", labels)
("Labels shape:", len(labels))
print("Labels[0] shape:", len(labels[0]))
break
and the output is what you would expect:
Datas shape: torch.Size([6, 5, 3, 224, 224])
Labels: tensor([[0., 1., 0., 0.],
[0., 1., 0., 0.],
[0., 1., 0., 0.],
[0., 1., 0., 0.],
[0., 1., 0., 0.],
[0., 1., 0., 0.]])
Labels shape: torch.Size([6, 4])