[SOLVED] Access lam from make_smoothing_spline (scipy interpolate)

Access lam from make_smoothing_spline (scipy interpolate)

I would like to run scipy.interpolate.make_smoothing_spline() but adjust the automatically determined lam to make it more biased towards smoothing than preserving data, or conversely.

Is there any way, including by using private classes, to access the automatically estimated lam to be able to multiply it by a factor? I really want to use the GCV criterion (unless you prove me I'm on the wrong track).

Solution

I ended up reimplementing the make_smoothing_spline function, simply adding an optional bias_factor argument.

If bias_factor=1: nothing changes
If bias_factor<1: the optimal smoothing parameters are biased towards more fidelity to data
If bias_factor>1: the optimal smoothing parameters are biased towards more smoothing

from scipy.interpolate._bsplines import _coeff_of_divided_diff, _compute_optimal_gcv_parameter
from scipy._lib._array_api import array_namespace
from scipy.interpolate import BSpline
from scipy.linalg import solve_banded
from numpy.core.multiarray import normalize_axis_index

def make_smoothing_spline_with_bias(x, y, bias_factor=1.0, w=None, lam=None, *, axis=0):
    '''
    Reimplemented the full make_smoothing_spline function but made it accept a bias factor towards smoothing or fidelity to data.
    Identical to https://github.com/scipy/scipy/blob/main/scipy/interpolate/_bsplines.py#L2210
    but multiplies the automatically estimated optimal lambda value by the bias factor.

    NEW INPUT:
    - bias_factor >=0. > 1 for smoother output, < 1 for more fidelity to data
    '''

    xp = array_namespace(x, y)

    x = np.ascontiguousarray(x, dtype=float)
    y = np.ascontiguousarray(y, dtype=float)

    if bias_factor <0:
        raise ValueError('``bias_factor`` should be positive')

    if any(x[1:] - x[:-1] <= 0):
        raise ValueError('``x`` should be an ascending array')

    if x.ndim != 1 or x.shape[0] != y.shape[axis]:
        raise ValueError(f'``x`` should be 1D and {x.shape = } == {y.shape = }')

    if w is None:
        w = np.ones(len(x))
    else:
        w = np.ascontiguousarray(w)
        if any(w <= 0):
            raise ValueError('Invalid vector of weights')

    t = np.r_[[x[0]] * 3, x, [x[-1]] * 3]
    n = x.shape[0]

    if n <= 4:
        raise ValueError('``x`` and ``y`` length must be at least 5')

    axis = normalize_axis_index(axis, y.ndim)
    y = np.moveaxis(y, axis, 0)
    y_shape1 = y.shape[1:]
    if y_shape1 != ():
        y = y.reshape((n, -1))

    # create design matrix in the B-spline basis
    X_bspl = BSpline.design_matrix(x, t, 3)
    X = np.zeros((5, n))
    for i in range(1, 4):
        X[i, 2: -2] = X_bspl[i: i - 4, 3: -3][np.diag_indices(n - 4)]

    X[1, 1] = X_bspl[0, 0]
    X[2, :2] = ((x[2] + x[1] - 2 * x[0]) * X_bspl[0, 0],
                X_bspl[1, 1] + X_bspl[1, 2])
    X[3, :2] = ((x[2] - x[0]) * X_bspl[1, 1], X_bspl[2, 2])
    X[1, -2:] = (X_bspl[-3, -3], (x[-1] - x[-3]) * X_bspl[-2, -2])
    X[2, -2:] = (X_bspl[-2, -3] + X_bspl[-2, -2],
                 (2 * x[-1] - x[-2] - x[-3]) * X_bspl[-1, -1])
    X[3, -2] = X_bspl[-1, -1]

    wE = np.zeros((5, n))
    wE[2:, 0] = _coeff_of_divided_diff(x[:3]) / w[:3]
    wE[1:, 1] = _coeff_of_divided_diff(x[:4]) / w[:4]
    for j in range(2, n - 2):
        wE[:, j] = (x[j+2] - x[j-2]) * _coeff_of_divided_diff(x[j-2:j+3])\
                   / w[j-2: j+3]
    wE[:-1, -2] = -_coeff_of_divided_diff(x[-4:]) / w[-4:]
    wE[:-2, -1] = _coeff_of_divided_diff(x[-3:]) / w[-3:]
    wE *= 6

    # MULTIPLY LAM BY BIAS FACTOR
    if lam is None:
        lam = _compute_optimal_gcv_parameter(X, wE, y, w)
        lam *= bias_factor
    elif lam < 0.:
        raise ValueError('Regularization parameter should be non-negative')

    # solve the initial problem in the basis of natural splines
    if np.ndim(lam) == 0:
        c = solve_banded((2, 2), X + lam * wE, y)
    elif np.ndim(lam) == 1:
        # XXX: solve_banded does not suppport batched `ab` matrices; loop manually
        c = np.empty((n, lam.shape[0]))
        for i in range(lam.shape[0]):
            c[:, i] = solve_banded((2, 2), X + lam[i] * wE, y[:, i])
    else:
        # this should not happen, ever
        raise RuntimeError("Internal error, please report it to SciPy developers.")
    c = c.reshape((c.shape[0], *y_shape1))

    # hack: these are c[0], c[1] etc, shape-compatible with np.r_ below
    c0, c1 = c[0:1, ...], c[1:2, ...]     # c[0], c[1]
    cm0, cm1 = c[-1:-2:-1, ...], c[-2:-3:-1, ...]    # c[-1], c[-2]

    c_ = np.r_[c0 * (t[5] + t[4] - 2 * t[3]) + c1,
               c0 * (t[5] - t[3]) + c1,
               c[1: -1, ...],
               cm0 * (t[-4] - t[-6]) + cm1,
               cm0 * (2 * t[-4] - t[-5] - t[-6]) + cm1]

    t, c_ = xp.asarray(t), xp.asarray(c_)
    return BSpline.construct_fast(t, c_, 3, axis=axis)

This uses some functions that are part of Scipy's private API. It was tested successfully on scipy v 1.16.1.