I have the following code which provides the best fit curve for my given data. I am not sure how to bootstrap it and use that to find the 95% confidence interval.
This is my code:
import numpy as np
import matplotlib.pyplot as plt
import pylab as plb
from scipy.optimize import curve_fit
data = np.loadtxt('gaussian.dat')
x = data[:, 0]
y = data[:, 1]
n = len(x)
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n
def gauss(x,a,x0,sigma):
return a*np.exp(-(x-x0)**2/(2*sigma**2))
popt,pcov = curve_fit(gauss,x,y,p0=[1,mean,sigma])
plt.plot(x,y,'b+:',label='data')
plt.plot(x,gauss(x,*popt),'ro:',label='fit')
plt.legend()
plt.title('Gaussian Fit vs Actual Data')
plt.xlabel('x-values')
plt.ylabel('y-values')
plt.show()
Let's say that x, y are generated from the gauss with an error:
# -- generating custom random dataset
# skip this part if you are using data from the file
x = np.arange(-100,100,step=0.1)
a = 2.4
x0 = 0.1
sigma = 3
def gauss(x,a,x0,sigma):
return a*np.exp(-(x-x0)**2/(2*sigma**2))
# y is a gauss process with random error introduced from the measurement
y = gauss(x, a, x0, sigma) + 0.1 * np.random.normal(size = x.shape[0])
# <--- end of generating dataset
# fitting the curve
n = len(x)
mean = sum(x*y)/n
sigma = sum(y*(x-mean)**2)/n
popt,pcov = curve_fit(gauss,x,y,p0=[1,mean,sigma])
Then to generate confidence interval in any point (for example in x=0.2) you can sample parameters, apply gauss function with this parameters and get y in this point. Then get 2.5% and 97.5% quantile to plot 95% confidence interval:
def bootstrap_gauss(x: float, popt: np.ndarray, pcov: np.ndarray):
arr_bootstrap_y = []
# bootstrapping parameters 100 times
for t in range(100):
a, mean, sigma = np.random.multivariate_normal(popt, pcov)
y = gauss(x,a,x0,sigma)
arr_bootstrap_y.append(y)
return arr_bootstrap_y
conf_interval = bootstrap_gauss(0.2, popt, pcov)
# 95% confidence interval
q1, q2 = np.quantile(conf_interval, [0.025,0.975])
print("Confidence interval:", q1, q2)
The remaining question is how to vectorize this calculations to make it more efficient...
def get_conf_gaus(x: float,
popt: np.ndarray,
pcov: np.ndarray,
n_boostrap:int = 100):
params = np.random.multivariate_normal(popt, pcov, size = [n_boostrap])
a = params[:,0]
mu = params[:,1]
sigma = params[:,2]
bootstrap_y = gauss(0.2,a,mu,sigma)
conf = np.quantile(bootstrap_y, [0.025,0.975])
return conf
conf = get_conf_gaus(0.2, popt, pcov)
# outputs [2.37079969, 2.41727759]