I am performing a weighted cubic spline fit to some data x, y, and errors on y. I then want to interpolate other values and their errors along that curve. I am having a very difficult time figuring out how to do the error propagation on each interpolated point and could use some help.
My current approach:
import sys
import numpy as np
from PySide6.QtWidgets import QApplication, QMainWindow, QVBoxLayout, QWidget
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
from scipy.interpolate import splrep, splev
class MplCanvas(FigureCanvas):
def __init__(self, parent=None, width=5, height=4, dpi=100):
fig = Figure(figsize=(width, height), dpi=dpi)
self.axes = fig.add_subplot(111)
super().__init__(fig)
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.canvas = MplCanvas(self, width=5, height=4, dpi=100)
layout = QVBoxLayout()
layout.addWidget(self.canvas)
container = QWidget()
container.setLayout(layout)
self.setCentralWidget(container)
self.plot()
def plot(self):
# Generate random data with errors
np.random.seed(0)
x = np.linspace(0, 10, 10)
y = np.sin(x) + np.random.normal(0, 0.1, len(x))
y_err = np.random.normal(0.1, 0.02, len(x)) + 0.5
# Fit a weighted cubic spline using splrep
tck = splrep(x, y, w=1/y_err, k=3)
# Interpolate values using splev
x_interp = np.linspace(0, 10, 100)
y_interp = splev(x_interp, tck)
# Calculate the fitted values at the data points (for residuals)
y_fit = splev(x, tck)
# Calculate residuals
residuals = y - y_fit
# Calculate chi-squared and degrees of freedom
chi2 = np.sum((residuals / y_err) ** 2)
dof = len(x) - len(tck[1]) // 3 - 1
# Covariance matrix for weighted least squares
cov_matrix = np.diag(y_err ** 2) * chi2 / dof
# Compute Jacobians at interpolated points
jacobian = np.array([splev(xi, tck, der=1) for xi in x_interp])
# Extract the diagonal elements of the covariance matrix (variances)
variances = np.diag(cov_matrix)
# Convert variances to column vector for broadcasting
variances_column = variances[:, np.newaxis]
# Propagate errors through the squared Jacobian and sum up
jacobian_squared = jacobian ** 2
weighted_jacobian = jacobian_squared * variances_column
sum_weighted_jacobian = np.sum(weighted_jacobian, axis=0)
# Calculate the final interpolated errors
y_interp_err = np.sqrt(sum_weighted_jacobian)
# Plot the results
self.canvas.axes.errorbar(x, y, yerr=y_err, fmt='o', label='Data')
self.canvas.axes.plot(x_interp, y_interp, label='Cubic Spline Fit')
self.canvas.axes.fill_between(x_interp, y_interp - y_interp_err, y_interp + y_interp_err, alpha=0.2, label='Error')
self.canvas.axes.legend()
self.canvas.draw()
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec())
This generates points where the errors are zero, e.g.:
which looks incorrect. What am I doing wrong?
It seems you are trying to propagate y
errors using gradient wrt x
variable Jx = dy/dx
to estimate y
errors. This is somehow a circular procedure and it is at least dimensionally incorrect.
The reason why your "error" function is zeroing is simply due to zero of the gradient when the model has zero slope (you can check it visually zero happens when model slope is null).
What you are probably looking for is parameters error propagation (see this reference), in this scenario we propagate parameters uncertainty using the gradient Jp
with the classical formulae: Cy = Jp @ Cp @ Jp.T
. That is, it will provide uncertainty on y
based on error propagation of the parameters p
against our model.
Unfortunately this is not straightforward using BSpline
but we can perform it using curve_fit
which provides parameters covariance estimate alongside with fitted parameters.
I leave a MCVE below to show how to perform such parametric propagation.
If you are looking for a non parametric procedure, you should probably have a look to Gaussian Process Regression which is nicely implemented in sklearn.
import numpy as np
import numdifftools as nd
from scipy import interpolate, optimize, stats
import matplotlib.pyplot as plt
We create the model with some ad hoc parameters:
def model(x, a, b, c):
return a * np.sin(b * x) + c
We generate some synthetic dataset with known uncertainties:
np.random.seed(0)
m = 10
x = np.linspace(0, 10, m)
s = 0.5 * np.ones(x.size)
n = s * np.random.normal(size=x.size)
p = (1., 1., 0.)
y = model(x, *p)
yn = y + n
We assess the exact model:
xlin = np.linspace(x.min(), x.max(), 200)
ylin = model(xlin, *p)
We can smooth the dataset using BSpline (for comparison sake):
tck = interpolate.splrep(x, yn, w=1/s, k=3)
spline = interpolate.BSpline(*tck)
yspl = spline(xlin)
We adjust the model with curve_fit
:
popt, pcov = optimize.curve_fit(model, x, yn, sigma=s, absolute_sigma=True)
yhat = model(xlin, *popt)
Then, using the parameters covariance matrix, we can assess error on y
:
def variance(model, x, p, Cp):
def proxy(q):
return model(x, *q)
def projection(J):
return J @ Cp @ J.T
Jp = nd.Gradient(proxy)(p)
Cy = np.apply_along_axis(projection, 1, Jp)
return Cy
Finally we assess the Confidence Bands:
alpha = 0.01
z = stats.norm.ppf(1 - alpha / 2.)
Cy = variance(model, xlin, popt, pcov)
sy = np.sqrt(Cy)
ci = z * sy
Plotting everything to check:
fig, axe = plt.subplots()
axe.errorbar(x, yn, yerr=s, linestyle="none", marker=".", label="Data")
axe.plot(xlin, ylin, label="Model")
axe.plot(xlin, yspl, label="BSpline")
axe.plot(xlin, yhat, label="NLLS")
axe.fill_between(xlin, yhat - ci, yhat + ci, alpha=0.2, label="CI 1 %")
axe.set_title("Error Propagation")
axe.set_xlabel("Variable $x$")
axe.set_ylabel("Variable $y$")
axe.legend()
axe.grid()
Where the confidence bands belong to the red curve (NLLS).