Currently, I'm working on an image motion deblurring problem with PyTorch. I have two kinds of images: Blurry images (variable = blur_image) that are the input image and the sharp version of the same images (variable = shar_image), which should be the output. Now I wanted to try out transfer learning, but I can't get it to work.
Here is the code for my dataloaders:
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle = True)
validation_loader = torch.utils.data.DataLoader(valid_dataset,
batch_size=batch_size,
shuffle = False)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size,
shuffle = False)
Their shape:
Trainloader - Shape of blur_image [N, C, H, W]: torch.Size([16, 3, 128, 128])
Trainloader - Shape of sharp_image [N, C, H, W]: torch.Size([16, 3, 128, 128]) torch.float32
Validationloader - Shape of blur_image [N, C, H, W]: torch.Size([16, 3, 128, 128])
Validationloader - Shape of sharp_image [N, C, H, W]: torch.Size([16, 3, 128, 128]) torch.float32
Testloader- Shape of blur_image [N, C, H, W]: torch.Size([16, 3, 128, 128])
Testloader- Shape of sharp_image [N, C, H, W]: torch.Size([16, 3, 128, 128]) torch.float32
The way I use transfer learning (I thought that for the 'in_features' I have to put in the amount of pixels):
model = models.alexnet(pretrained=True)
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 128)
device_string = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_string)
model = model.to(device)
The way I define my training process:
# Define the loss function (MSE was chosen due to the comparsion of pixels
# between blurred and sharp images
criterion = nn.MSELoss()
# Define the optimizer and learning rate
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Learning rate schedule - If the loss value does not improve after 5 epochs
# back-to-back then the new learning rate will be: previous_rate*0.5
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='min',
patience=5,
factor=0.5,
verbose=True
)
def training(model, trainDataloader, epoch):
""" Function to define the model training
Args:
model (Model object): The model that is going to be trained.
trainDataloader (Dataloader object): Dataloader object of the trainset.
epoch (Integer): Number of training epochs.
"""
# Changing model into trainings mode
model.train()
# Supporting variable to display the loss for each epoch
running_loss = 0.0
running_psnr = 0.0
for i, data in tqdm(enumerate(trainDataloader),
total=int(len(train_dataset)/trainDataloader.batch_size)):
blur_image = data[0]
sharp_image = data[1]
# Transfer the blurred and sharp image instance to the device
blur_image = blur_image.to(device)
sharp_image = sharp_image.to(device)
# Sets the gradient of tensors to zero
optimizer.zero_grad()
outputs = model(blur_image)
loss = criterion(outputs, sharp_image)
# Perform backpropagation
loss.backward()
# Update the weights
optimizer.step()
# Add the loss that was calculated during the trainigs run
running_loss += loss.item()
# calculate batch psnr (once every `batch_size` iterations)
batch_psnr = psnr(sharp_image, blur_image)
running_psnr += batch_psnr
# Display trainings loss
trainings_loss = running_loss/len(trainDataloader.dataset)
final_psnr = running_psnr/int(len(train_dataset)/trainDataloader.batch_size)
final_ssim = ssim(sharp_image, blur_image, data_range=1, size_average=True)
print(f"Trainings loss: {trainings_loss:.5f}")
print(f"Train PSNR: {final_psnr:.5f}")
print(f"Train SSIM: {final_ssim:.5f}")
return trainings_loss, final_psnr, final_ssim
And here is my way to start the training:
train_loss = []
val_loss = []
train_PSNR_score = []
train_SSIM_score = []
val_PSNR_score = []
val_SSIM_score = []
start = time.time()
for epoch in range(nb_epochs):
print(f"Epoch {epoch+1}\n-------------------------------")
train_epoch_loss = training(model, train_loader, nb_epochs)
val_epoch_loss = validation(model, validation_loader, nb_epochs)
train_loss.append(train_epoch_loss[0])
val_loss.append(val_epoch_loss[0])
train_PSNR_score.append(train_epoch_loss[1])
train_SSIM_score.append(train_epoch_loss[2])
val_PSNR_score.append(val_epoch_loss[1])
val_SSIM_score.append(val_epoch_loss[2])
scheduler.step(train_epoch_loss[0])
scheduler.step(val_epoch_loss[0])
end = time.time()
print(f"Took {((end-start)/60):.3f} minutes to train")
But every time when I want to perform the training I receive the following error:
0%| | 0/249 [00:00<?, ?it/s]Epoch 1
-------------------------------
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py:528: UserWarning: Using a target size (torch.Size([16, 3, 128, 128])) that is different to the input size (torch.Size([16, 128])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-195-ff0214e227cd> in <module>()
9 for epoch in range(nb_epochs):
10 print(f"Epoch {epoch+1}\n-------------------------------")
---> 11 train_epoch_loss = training(model, train_loader, nb_epochs)
12 val_epoch_loss = validation(model, validation_loader, nb_epochs)
13 train_loss.append(train_epoch_loss[0])
<ipython-input-170-dfa2c212ad23> in training(model, trainDataloader, epoch)
25 optimizer.zero_grad()
26 outputs = model(blur_image)
---> 27 loss = criterion(outputs, sharp_image)
28
29 # Perform backpropagation
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
526
527 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 528 return F.mse_loss(input, target, reduction=self.reduction)
529
530
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in mse_loss(input, target, size_average, reduce, reduction)
2926 reduction = _Reduction.legacy_get_string(size_average, reduce)
2927
-> 2928 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2929 return torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2930
/usr/local/lib/python3.7/dist-packages/torch/functional.py in broadcast_tensors(*tensors)
72 if has_torch_function(tensors):
73 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 74 return _VF.broadcast_tensors(tensors) # type: ignore
75
76
RuntimeError: The size of tensor a (16) must match the size of tensor b (128) at non-singleton dimension 2
model structure:
AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=128, bias=True)
)
)
I'm a newbie in terms of using Pytorch (and image deblurring in general) and so I rather confused about the meaning of the error message and how to fix it. I tried to change my parameters and nothing worked. Does anyone have any advice for me on how to solve this problem?
I would appreciate every input :)
Here your you can't use alexnet
for this task. becouse output from your model and sharp_image
should be shame. because convnet
encode your image as enbeddings you and fully connected layers can not convert these images to its normal size you can not use fully connected layers for decoding, for obtain the same size you need to use ConvTranspose2d()
for this task.
your encoder should be:
class ConvEncoder(nn.Module):
"""
A simple Convolutional Encoder Model
"""
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 16, (3, 3), padding=(1, 1))
self.relu1 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d((2, 2))
self.conv2 = nn.Conv2d(16, 32, (3, 3), padding=(1, 1))
self.relu2 = nn.ReLU(inplace=True)
self.maxpool2 = nn.MaxPool2d((2, 2))
self.conv3 = nn.Conv2d(32, 64, (3, 3), padding=(1, 1))
self.relu3 = nn.ReLU(inplace=True)
self.maxpool3 = nn.MaxPool2d((2, 2))
self.conv4 = nn.Conv2d(64, 128, (3, 3), padding=(1, 1))
self.relu4 = nn.ReLU(inplace=True)
self.maxpool4 = nn.MaxPool2d((2, 2))
def forward(self, x):
# Downscale the image with conv maxpool etc.
x = self.conv1(x)
x = self.relu1(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x = self.conv3(x)
x = self.relu3(x)
x = self.maxpool3(x)
x = self.conv4(x)
x = self.relu4(x)
x = self.maxpool4(x)
return x
And your decoder should be:
class ConvDecoder(nn.Module):
"""
A simple Convolutional Decoder Model
"""
def __init__(self):
super().__init__()
self.deconv1 = nn.ConvTranspose2d(256, 128, (2, 2), stride=(2, 2))
self.relu1 = nn.ReLU(inplace=True)
self.deconv2 = nn.ConvTranspose2d(128, 64, (2, 2), stride=(2, 2))
self.relu2 = nn.ReLU(inplace=True)
self.deconv3 = nn.ConvTranspose2d(64, 32, (2, 2), stride=(2, 2))
self.relu3 = nn.ReLU(inplace=True)
self.deconv4 = nn.ConvTranspose2d(32, 16, (2, 2), stride=(2, 2))
self.relu4 = nn.ReLU(inplace=True)
def forward(self, x):
# Upscale the image with convtranspose etc.
x = self.deconv1(x)
x = self.relu1(x)
x = self.deconv2(x)
x = self.relu2(x)
x = self.deconv3(x)
x = self.relu3(x)
x = self.deconv4(x)
x = self.relu4(x)
return x
encoder = ConvEncoder()
decoder = ConvDecoder()
You can train your model like that:
encoder.train()
decoder.train()
for batch_idx, (train_img, target_img) in enumerate(train_loader):
# Move images to device
train_img = train_img.to(device)
target_img = target_img.to(device)
# Zero grad the optimizer
optimizer.zero_grad()
# Feed the train images to encoder
enc_output = encoder(train_img)
# The output of encoder is input to decoder !
dec_output = decoder(enc_output)
# Decoder output is reconstructed image
# Compute loss with it and orginal image which is target image.
loss = loss_fn(dec_output, target_img)
# Backpropogate
loss.backward()
# Apply the optimizer to network by calling step.
optimizer.step()
# Return the loss
return loss.item()
you might want visit this for getting help in your project.