optimizationpytorchloss

.pytorch optimizer can't work the loss backpropagation


I set two free parameters as two deviations for distribution. There is a true distribution with true deviations, and I want to optimize the free parameters. the mean of distribution is fixed, of course.

However, the loss is well calculated. But there is no update of free parameters.

Could you please help me?

Here's the code

import torch
import torch.optim as optim
import numpy as np
from torch.distributions.multivariate_normal import MultivariateNormal

# Set up optimized sigmas
sigma_x = torch.tensor(1.0, requires_grad=True)
sigma_y = torch.tensor(1.0, requires_grad=True)

# Generate grid
x, y = np.meshgrid(np.arange(1, 12), np.arange(1, 12))

# Set up a single subgoal
subgoal = (3, 5)  # Adjust as needed

# Convert subgoal to torch tensor
subgoal = torch.tensor(subgoal, dtype=torch.float32)

# True distribution parameters
true_sigma_x = 2.0
true_sigma_y = 1.5

# Target distribution (true distribution)
true_distribution = None

mean = subgoal
cov_matrix = np.array([[true_sigma_x**2, 0], [0, true_sigma_y**2]])
true_distribution = multivariate_normal.pdf(np.stack([x.flatten(), y.flatten()]).T, mean=mean, cov=cov_matrix)
true_distribution = true_distribution.reshape(x.shape) / np.max(true_distribution)
true_distribution = torch.tensor(true_distribution)

# Define MSE loss function
def mse_loss(estimated_distribution, true_distribution):
    return torch.mean((estimated_distribution - true_distribution)**2)

# Set up optimizer
optimizer = optim.Adam([sigma_x, sigma_y], lr=0.01)
x, y = torch.meshgrid(torch.arange(0, 11), torch.arange(0, 11))
# Training loop
num_epochs = 1000  # Adjust as needed

def loss(sigmas, true_distribution):
  # Calculate the final distribution with current sigmas
    mean = subgoal
    coords = torch.stack([x.flatten(), y.flatten()], dim=1)
    cov_matrix = torch.tensor([[sigmas[0]**2, 0], [0, sigmas[1]**2]], requires_grad=True)
    estimated_distribution = MultivariateNormal(mean, covariance_matrix=cov_matrix)
    estimated_dist = torch.exp(estimated_distribution.log_prob(coords).requires_grad_(True)).reshape(x.shape).requires_grad_(True)
    # Normalize the estimated distribution for MSE
    estimated_distribution = estimated_dist / torch.max(estimated_dist)
    mse = mse_loss(estimated_distribution, true_distribution)
    return mse

for epoch in range(num_epochs):
   
    # Compute loss
    total_loss = loss([sigma_x,sigma_y], true_distribution)

    # Optimization step
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    # Print loss every 100 epochs
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss.item()}')

# Print optimized sigmas
print(f'Optimized Sigma_x: {sigma_x.item()}, Optimized Sigma_y: {sigma_y.item()}')



Epoch 0, Loss: 0.07415312548569078
Epoch 100, Loss: 0.07415312548569078
Epoch 200, Loss: 0.07415312548569078
Epoch 300, Loss: 0.07415312548569078
Epoch 400, Loss: 0.07415312548569078
Epoch 500, Loss: 0.07415312548569078
Epoch 600, Loss: 0.07415312548569078
Epoch 700, Loss: 0.07415312548569078
Epoch 800, Loss: 0.07415312548569078
Epoch 900, Loss: 0.07415312548569078
Optimized Sigma_x: 1.0, Optimized Sigma_y: 1.0

I want to figure out what interrupts the backpropagation.


Solution

  • cov_matrix = torch.tensor([[sigmas[0]**2, 0], [0, sigmas[1]**2]], requires_grad=True)

    This line actually breaks the gradient flow as explained in here. To make the gradient flow back to your parameters, change that line to the following cov_matrix = torch.diag(torch.stack(sigmas).pow(2))