I would like to run scipy.interpolate.make_smoothing_spline() but adjust the automatically determined lam to make it more biased towards smoothing than preserving data, or conversely.
Is there any way, including by using private classes, to access the automatically estimated lam to be able to multiply it by a factor? I really want to use the GCV criterion (unless you prove me I'm on the wrong track).
I ended up reimplementing the make_smoothing_spline function, simply adding an optional bias_factor
argument.
If bias_factor=1: nothing changes
If bias_factor<1: the optimal smoothing parameters are biased towards more fidelity to data
If bias_factor>1: the optimal smoothing parameters are biased towards more smoothing
from scipy.interpolate._bsplines import _coeff_of_divided_diff, _compute_optimal_gcv_parameter
from scipy._lib._array_api import array_namespace
from scipy.interpolate import BSpline
from scipy.linalg import solve_banded
from numpy.core.multiarray import normalize_axis_index
def make_smoothing_spline_with_bias(x, y, bias_factor=1.0, w=None, lam=None, *, axis=0):
'''
Reimplemented the full make_smoothing_spline function but made it accept a bias factor towards smoothing or fidelity to data.
Identical to https://github.com/scipy/scipy/blob/main/scipy/interpolate/_bsplines.py#L2210
but multiplies the automatically estimated optimal lambda value by the bias factor.
NEW INPUT:
- bias_factor >=0. > 1 for smoother output, < 1 for more fidelity to data
'''
xp = array_namespace(x, y)
x = np.ascontiguousarray(x, dtype=float)
y = np.ascontiguousarray(y, dtype=float)
if bias_factor <0:
raise ValueError('``bias_factor`` should be positive')
if any(x[1:] - x[:-1] <= 0):
raise ValueError('``x`` should be an ascending array')
if x.ndim != 1 or x.shape[0] != y.shape[axis]:
raise ValueError(f'``x`` should be 1D and {x.shape = } == {y.shape = }')
if w is None:
w = np.ones(len(x))
else:
w = np.ascontiguousarray(w)
if any(w <= 0):
raise ValueError('Invalid vector of weights')
t = np.r_[[x[0]] * 3, x, [x[-1]] * 3]
n = x.shape[0]
if n <= 4:
raise ValueError('``x`` and ``y`` length must be at least 5')
axis = normalize_axis_index(axis, y.ndim)
y = np.moveaxis(y, axis, 0)
y_shape1 = y.shape[1:]
if y_shape1 != ():
y = y.reshape((n, -1))
# create design matrix in the B-spline basis
X_bspl = BSpline.design_matrix(x, t, 3)
X = np.zeros((5, n))
for i in range(1, 4):
X[i, 2: -2] = X_bspl[i: i - 4, 3: -3][np.diag_indices(n - 4)]
X[1, 1] = X_bspl[0, 0]
X[2, :2] = ((x[2] + x[1] - 2 * x[0]) * X_bspl[0, 0],
X_bspl[1, 1] + X_bspl[1, 2])
X[3, :2] = ((x[2] - x[0]) * X_bspl[1, 1], X_bspl[2, 2])
X[1, -2:] = (X_bspl[-3, -3], (x[-1] - x[-3]) * X_bspl[-2, -2])
X[2, -2:] = (X_bspl[-2, -3] + X_bspl[-2, -2],
(2 * x[-1] - x[-2] - x[-3]) * X_bspl[-1, -1])
X[3, -2] = X_bspl[-1, -1]
wE = np.zeros((5, n))
wE[2:, 0] = _coeff_of_divided_diff(x[:3]) / w[:3]
wE[1:, 1] = _coeff_of_divided_diff(x[:4]) / w[:4]
for j in range(2, n - 2):
wE[:, j] = (x[j+2] - x[j-2]) * _coeff_of_divided_diff(x[j-2:j+3])\
/ w[j-2: j+3]
wE[:-1, -2] = -_coeff_of_divided_diff(x[-4:]) / w[-4:]
wE[:-2, -1] = _coeff_of_divided_diff(x[-3:]) / w[-3:]
wE *= 6
# MULTIPLY LAM BY BIAS FACTOR
if lam is None:
lam = _compute_optimal_gcv_parameter(X, wE, y, w)
lam *= bias_factor
elif lam < 0.:
raise ValueError('Regularization parameter should be non-negative')
# solve the initial problem in the basis of natural splines
if np.ndim(lam) == 0:
c = solve_banded((2, 2), X + lam * wE, y)
elif np.ndim(lam) == 1:
# XXX: solve_banded does not suppport batched `ab` matrices; loop manually
c = np.empty((n, lam.shape[0]))
for i in range(lam.shape[0]):
c[:, i] = solve_banded((2, 2), X + lam[i] * wE, y[:, i])
else:
# this should not happen, ever
raise RuntimeError("Internal error, please report it to SciPy developers.")
c = c.reshape((c.shape[0], *y_shape1))
# hack: these are c[0], c[1] etc, shape-compatible with np.r_ below
c0, c1 = c[0:1, ...], c[1:2, ...] # c[0], c[1]
cm0, cm1 = c[-1:-2:-1, ...], c[-2:-3:-1, ...] # c[-1], c[-2]
c_ = np.r_[c0 * (t[5] + t[4] - 2 * t[3]) + c1,
c0 * (t[5] - t[3]) + c1,
c[1: -1, ...],
cm0 * (t[-4] - t[-6]) + cm1,
cm0 * (2 * t[-4] - t[-5] - t[-6]) + cm1]
t, c_ = xp.asarray(t), xp.asarray(c_)
return BSpline.construct_fast(t, c_, 3, axis=axis)
This uses some functions that are part of Scipy's private API. It was tested successfully on scipy v 1.16.1.