I created a Dataset in Pytorch to load images and feed them to a CNN.
In the dataset __getitem__
method, images are processed using a transform function (normalization on Imagenet avg values) before being returned.
I noticed that, if I revert the transformed image inside the dataloader itself, I get back the original image as expected. However, if I do the same with one of the images returned from the dataloader, the image looks different. Isn't the dataloader just stacking the transformed images together?
This is an example of the code I'm using (just returning copies of an image for which you have to specify the local path):
import cv2
import numpy as np
import torch
class CustomDataset(torch.utils.data.Dataset):
def __init__(self, img_path):
# RGB image
self.img = cv2.imread(img_path)[:, :, ::-1]
def __len__(self):
return 100
def transform(self, img: np.array, data_type=np.float32):
img = img.astype(data_type) / 255
img[:, :, 0] = (img[:, :, 0] - 0.485) / 0.229
img[:, :, 1] = (img[:, :, 1] - 0.456) / 0.224
img[:, :, 2] = (img[:, :, 2] - 0.406) / 0.225
return img
def __getitem__(self, idx):
images_raw = [self.img] * 5
# transform images
images_transformed = np.array([self.transform(img) for img in images_raw])
images_transformed = torch.tensor(images_transformed)
# test reconverting image and saving it here
image_reconverted = transform_reverse(images_transformed[0].numpy())
cv2.imwrite("puppy_00_re_tranformed_in_dataloader.jpg", image_reconverted[:, :, ::-1])
return images_transformed
def main():
img_path = "puppy.jpg"
custom_dataset = CustomDataset(img_path)
dataloader = torch.utils.data.DataLoader(custom_dataset, batch_size=10, shuffle=True, drop_last=True)
for batch_data in dataloader:
# take one of the images returned from Dataloader and apply reverse transform
re_converted_img = transform_reverse(batch_data[0][0].numpy())
cv2.imwrite("puppy_01_re_transformed_in_main.jpg", re_converted_img[:, :, ::-1])
def transform_reverse(img):
img[:, :, 0] = img[:, :, 0] * 0.229 + 0.485
img[:, :, 1] = img[:, :, 1] * 0.224 + 0.456
img[:, :, 2] = img[:, :, 2] * 0.225 + 0.406
img = np.round(img * 255).astype(np.uint8)
return img
if __name__ == "__main__":
main()
As a reference, this is the original image:
while this is the one after reversing the transformation in main:
I solved this by creating a copy of the image in the transform and transform_reverse functions, and applying the operations on the copy
def transform(self, img: np.array, data_type=np.float32):
img_copy = img.copy()
img_copy = img_copy.astype(data_type) / 255
img_copy[:, :, 0] = (img_copy[:, :, 0] - 0.485) / 0.229
img_copy[:, :, 1] = (img_copy[:, :, 1] - 0.456) / 0.224
img_copy[:, :, 2] = (img_copy[:, :, 2] - 0.406) / 0.225
return img_copy