Consider a regression task where the parameters of the model differ significantly in magnitude, say:
def func(x, p):
p1, p2, p3 = p
return np.sin(p1*x) * np.exp(p2*x) * p3
# True Parameters:
p1, p2, p3 = np.pi/0.01, -1.25, 1.2356 # 314.1592, -1.25, 1.2356
The system requires a feasible initial guess to converge to the correct solution, for example:
p0 = [np.pi/0.01-80, -1.25-1, 1.2356-1]
However, if the initial guess is too far from the true solution, the regression may fail to converge. For example, the following initial guess might not work:
p0 = [np.pi/0.02-10, -1.25-1, 1.2356-1]
The reproducible code is as follows:
import numpy as np
from scipy.optimize import least_squares
import matplotlib.pyplot as plt
def func(x, p):
p1, p2, p3 = p
return np.sin(p1*x) * np.exp(p2*x) * p3
def residuals(p, y, x):
return y - func(x, p)
x = np.linspace(0, 0.05, 50)
p1, p2, p3 = np.pi/0.01, -1.25, 1.2356
y0 = func(x, [p1, p2, p3])
y = y0 + np.random.randn(len(x)) * 0.05
p0 = [np.pi/0.02-10, -1.25-1, 1.2356-1]
result = least_squares(residuals, p0, args=(y, x))
print("True parameters:", [p1, p2, p3])
print("Approximated parameters:", result.x)
x_test = np.linspace(0, 0.05, 200)
y_test = func(x_test, result.x)
y_real = func(x_test, [p1, p2, p3])
plt.plot(x_test, y_test, label="predict")
plt.plot(x, y, '.r', label="real")
Or here is a PyTorch version:
import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt
class guessEq(nn.Module):
def __init__(self):
super(guessEq, self).__init__()
self.params = nn.Parameter(torch.tensor([np.pi/0.01-10, -1.25+1, 1.2356-1]))
def forward(self, x):
out = torch.sin(self.params[0]*x) * \
torch.exp(-self.params[1]*x) * \
self.params[2]
return out
x = np.linspace(0, 0.05, 100)
y = np.sin(np.pi/0.01*x) * np.exp(-1.25*x) * 1.2356 + np.random.rand(x.shape[0]) * 0.05
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)
x = x.reshape((-1, 1))
y = y.reshape((-1, 1))
model = guessEq()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
mse = nn.MSELoss()
for i in range(2000):
optimizer.zero_grad()
y_pred = model(x)
loss = torch.mean(torch.square(y_pred - y))
loss.backward()
if i % 100 == 0:
print(loss)
optimizer.step()
x_test = torch.linspace(0, 0.05, 200).reshape((-1, 1))
y_test = model(x_test)
print("True parameters: [{} {} {}]".format(np.pi/0.01, -1.25, 1.2356))
print("Approximated parameters: {}".format(model.params.detach().numpy()))
plt.plot(x_test.detach().cpu().numpy().flatten(), y_test.detach().cpu().numpy().flatten(), c="blue", linewidth=2, label="predict")
plt.plot(x.detach().cpu().numpy().flatten(), y.detach().cpu().numpy().flatten(), '.r', label="train")
plt.legend()
plt.show()
How to address the issue when the initial guess is far from the true solution, or when there is no prior of the initial guess?
Since it most likely gets stuck in the local minimum, and the optimization algorithms are very sensitive to the initial guess, we can start with random initialization multiple times and then choose the solution corresponding to minimum cost in all the runs. It increases the chance of obtaining the global minimum, as shown below (global minimum could be found in 20 runs):
def residuals(p, y, x):
return (y - func(x, p))**2 # squared error
np.random.seed(10) # for reproducible result
num_runs = 20
best_init = None
best_cost = np.inf
best_result = None
p0 = np.zeros(3)
for i in range(num_runs):
p0 = np.random.normal(size=len(p0)) # random init, can use any other distribution
result = least_squares(residuals, p0, args=(y, x), method='lm', verbose=2)
# print(result.cost)
if best_cost > result.cost:
best_init = p0
best_cost = result.cost
best_result = result
print(best_init, best_cost)
# [ 0.91745894 -0.11227247 -0.36218045] 0.00041268250199614506
result = best_result
We can try different methods (trf
, dogbox
, lm
) for least squares and also explicitly provide the jacobian too and see if it gets better results.