I have the following code which functions correctly. However, instead of using the method curve_fit, I want to perform the fitting manually using scipy.optimize.minimze
on each element. Is it possible and how can we do so?
this is the data file
Here is my code
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
data = np.loadtxt('gaussian.dat')
x = data[:, 0]
y = data[:, 1]
n = len(x)
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n
def gauss(x,a,x0,sigma):
return a*np.exp(-(x-x0)**2/(2*sigma**2))
popt,pcov = curve_fit(gauss,x,y,p0=[1,mean,sigma])
def get_conf_gaus(x: float,popt: np.ndarray, pcov: np.ndarray, n_boostrap:int = 100):
params = np.random.multivariate_normal(popt, pcov, size = [n_boostrap])
a = params[:,0]
mu = params[:,1]
sigma = params[:,2]
bootstrap_y = gauss(0.2,a,mu,sigma)
conf = np.quantile(bootstrap_y, [0.025,0.975])
return conf
conf = get_conf_gaus(0.2, popt, pcov)
print(conf)
plt.plot(x,y,'b+:',label='data')
plt.plot(x,gauss(x,*popt),'ro:',label='fit')
plt.legend()
plt.title('Gaussian Fit vs Actual Data')
plt.xlabel('x-values')
plt.ylabel('y-values')
plt.show()
curve_fit
This method works well and returns alongside with optimized parameters an estimation of the covariance:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats, optimize
import numdifftools as nd
x, y = np.genfromtxt('gaussian.dat').T
def model(x, A, mu, sigma):
law = stats.norm(loc=mu, scale=sigma)
return A * law.pdf(x) / law.pdf(mu)
popt, pcov = optimize.curve_fit(model, x, y)
#(array([9.85743077, 1.96427309, 1.27652119]),
# array([[ 2.92986068e-01, 6.62630072e-10, -2.52941845e-02],
# [ 6.62630072e-10, 6.55112141e-03, -9.75743530e-11],
# [-2.52941845e-02, -9.75743530e-11, 6.55112158e-03]]))
minimize
To perform the same operation with optimize
we must write the loss function for the Least Square problem. We choose to do it using wrapper:
def loss_factory(x, y):
def wrapped(p):
return 0.5 * np.sum(np.power(y - model(x, *p), 2))
return wrapped
loss = loss_factory(x, y)
Then we can minimize the loss (equipped with our data as local variables):
sol = optimize.minimize(
loss,
x0=[1., 1., 1.],
bounds=[(0, np.inf), (-np.inf, np.inf), (0, np.inf)]
)
# message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
# success: True
# status: 0
# fun: 16.793558341382255
# x: [ 9.857e+00 1.964e+00 1.277e+00]
# nit: 11
# jac: [ 1.741e-05 -6.537e-05 7.354e-05]
# nfev: 56
# njev: 14
# hess_inv: <3x3 LbfgsInvHessProduct with dtype=float64>
Which is compliant with curve_fit
solution.
Now if we want to estimate the Covariance matrix, we can compute the Hessian at the solution and inverse it:
H = nd.Hessian(loss)(sol.x)
C = np.linalg.inv(H)
# array([[ 3.31906230e-01, -3.53666330e-04, -2.87276565e-02],
# [-3.53666330e-04, 7.41295793e-03, 9.16012115e-05],
# [-2.87276565e-02, 9.16012115e-05, 7.44036351e-03]])
Both methods agree:
xlin = np.linspace(x.min(), x.max(), 200)
fig, axe = plt.subplots()
axe.scatter(x, y)
axe.plot(xlin, model(xlin, *popt))
axe.plot(xlin, model(xlin, *sol.x), "--")
axe.grid()