pythonpytorchpytorch-dataloader

Reverting pre-processing step on image returned by Pytorch Dataloader leads to unexpected results


I created a Dataset in Pytorch to load images and feed them to a CNN. In the dataset __getitem__ method, images are processed using a transform function (normalization on Imagenet avg values) before being returned.

I noticed that, if I revert the transformed image inside the dataloader itself, I get back the original image as expected. However, if I do the same with one of the images returned from the dataloader, the image looks different. Isn't the dataloader just stacking the transformed images together?

This is an example of the code I'm using (just returning copies of an image for which you have to specify the local path):

import cv2
import numpy as np
import torch


class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, img_path):
        # RGB image
        self.img = cv2.imread(img_path)[:, :, ::-1]

    def __len__(self):
        return 100

    def transform(self, img: np.array, data_type=np.float32):
        img = img.astype(data_type) / 255
        img[:, :, 0] = (img[:, :, 0] - 0.485) / 0.229
        img[:, :, 1] = (img[:, :, 1] - 0.456) / 0.224
        img[:, :, 2] = (img[:, :, 2] - 0.406) / 0.225
        return img

    def __getitem__(self, idx):
        images_raw = [self.img] * 5

        # transform images
        images_transformed = np.array([self.transform(img) for img in images_raw])
        images_transformed = torch.tensor(images_transformed)

        # test reconverting image and saving it here
        image_reconverted = transform_reverse(images_transformed[0].numpy())
        cv2.imwrite("puppy_00_re_tranformed_in_dataloader.jpg", image_reconverted[:, :, ::-1])

        return images_transformed


def main():
    img_path = "puppy.jpg"

    custom_dataset = CustomDataset(img_path)
    dataloader = torch.utils.data.DataLoader(custom_dataset, batch_size=10, shuffle=True, drop_last=True)

    for batch_data in dataloader:
        # take one of the images returned from Dataloader and apply reverse transform
        re_converted_img = transform_reverse(batch_data[0][0].numpy())
        cv2.imwrite("puppy_01_re_transformed_in_main.jpg", re_converted_img[:, :, ::-1])


def transform_reverse(img):
    img[:, :, 0] = img[:, :, 0] * 0.229 + 0.485
    img[:, :, 1] = img[:, :, 1] * 0.224 + 0.456
    img[:, :, 2] = img[:, :, 2] * 0.225 + 0.406
    img = np.round(img * 255).astype(np.uint8)
    return img


if __name__ == "__main__":
    main()

As a reference, this is the original image:

original

while this is the one after reversing the transformation in main:

enter image description here


Solution

  • I solved this by creating a copy of the image in the transform and transform_reverse functions, and applying the operations on the copy

    def transform(self, img: np.array, data_type=np.float32):
        img_copy = img.copy()
        img_copy = img_copy.astype(data_type) / 255
        img_copy[:, :, 0] = (img_copy[:, :, 0] - 0.485) / 0.229
        img_copy[:, :, 1] = (img_copy[:, :, 1] - 0.456) / 0.224
        img_copy[:, :, 2] = (img_copy[:, :, 2] - 0.406) / 0.225
        return img_copy