pythonfilterscipy

Access lam from make_smoothing_spline (scipy interpolate)


I would like to run scipy.interpolate.make_smoothing_spline() but adjust the automatically determined lam to make it more biased towards smoothing than preserving data, or conversely.

Is there any way, including by using private classes, to access the automatically estimated lam to be able to multiply it by a factor? I really want to use the GCV criterion (unless you prove me I'm on the wrong track).


Solution

  • I ended up reimplementing the make_smoothing_spline function, simply adding an optional bias_factor argument.

    from scipy.interpolate._bsplines import _coeff_of_divided_diff, _compute_optimal_gcv_parameter
    from scipy._lib._array_api import array_namespace
    from scipy.interpolate import BSpline
    from scipy.linalg import solve_banded
    from numpy.core.multiarray import normalize_axis_index
    
    def make_smoothing_spline_with_bias(x, y, bias_factor=1.0, w=None, lam=None, *, axis=0):
        '''
        Reimplemented the full make_smoothing_spline function but made it accept a bias factor towards smoothing or fidelity to data.
        Identical to https://github.com/scipy/scipy/blob/main/scipy/interpolate/_bsplines.py#L2210
        but multiplies the automatically estimated optimal lambda value by the bias factor.
    
        NEW INPUT:
        - bias_factor >=0. > 1 for smoother output, < 1 for more fidelity to data
        '''
    
        xp = array_namespace(x, y)
    
        x = np.ascontiguousarray(x, dtype=float)
        y = np.ascontiguousarray(y, dtype=float)
    
        if bias_factor <0:
            raise ValueError('``bias_factor`` should be positive')
    
        if any(x[1:] - x[:-1] <= 0):
            raise ValueError('``x`` should be an ascending array')
    
        if x.ndim != 1 or x.shape[0] != y.shape[axis]:
            raise ValueError(f'``x`` should be 1D and {x.shape = } == {y.shape = }')
    
        if w is None:
            w = np.ones(len(x))
        else:
            w = np.ascontiguousarray(w)
            if any(w <= 0):
                raise ValueError('Invalid vector of weights')
    
        t = np.r_[[x[0]] * 3, x, [x[-1]] * 3]
        n = x.shape[0]
    
        if n <= 4:
            raise ValueError('``x`` and ``y`` length must be at least 5')
    
        axis = normalize_axis_index(axis, y.ndim)
        y = np.moveaxis(y, axis, 0)
        y_shape1 = y.shape[1:]
        if y_shape1 != ():
            y = y.reshape((n, -1))
    
        # create design matrix in the B-spline basis
        X_bspl = BSpline.design_matrix(x, t, 3)
        X = np.zeros((5, n))
        for i in range(1, 4):
            X[i, 2: -2] = X_bspl[i: i - 4, 3: -3][np.diag_indices(n - 4)]
    
        X[1, 1] = X_bspl[0, 0]
        X[2, :2] = ((x[2] + x[1] - 2 * x[0]) * X_bspl[0, 0],
                    X_bspl[1, 1] + X_bspl[1, 2])
        X[3, :2] = ((x[2] - x[0]) * X_bspl[1, 1], X_bspl[2, 2])
        X[1, -2:] = (X_bspl[-3, -3], (x[-1] - x[-3]) * X_bspl[-2, -2])
        X[2, -2:] = (X_bspl[-2, -3] + X_bspl[-2, -2],
                     (2 * x[-1] - x[-2] - x[-3]) * X_bspl[-1, -1])
        X[3, -2] = X_bspl[-1, -1]
    
        wE = np.zeros((5, n))
        wE[2:, 0] = _coeff_of_divided_diff(x[:3]) / w[:3]
        wE[1:, 1] = _coeff_of_divided_diff(x[:4]) / w[:4]
        for j in range(2, n - 2):
            wE[:, j] = (x[j+2] - x[j-2]) * _coeff_of_divided_diff(x[j-2:j+3])\
                       / w[j-2: j+3]
        wE[:-1, -2] = -_coeff_of_divided_diff(x[-4:]) / w[-4:]
        wE[:-2, -1] = _coeff_of_divided_diff(x[-3:]) / w[-3:]
        wE *= 6
    
        # MULTIPLY LAM BY BIAS FACTOR
        if lam is None:
            lam = _compute_optimal_gcv_parameter(X, wE, y, w)
            lam *= bias_factor
        elif lam < 0.:
            raise ValueError('Regularization parameter should be non-negative')
    
        # solve the initial problem in the basis of natural splines
        if np.ndim(lam) == 0:
            c = solve_banded((2, 2), X + lam * wE, y)
        elif np.ndim(lam) == 1:
            # XXX: solve_banded does not suppport batched `ab` matrices; loop manually
            c = np.empty((n, lam.shape[0]))
            for i in range(lam.shape[0]):
                c[:, i] = solve_banded((2, 2), X + lam[i] * wE, y[:, i])
        else:
            # this should not happen, ever
            raise RuntimeError("Internal error, please report it to SciPy developers.")
        c = c.reshape((c.shape[0], *y_shape1))
    
        # hack: these are c[0], c[1] etc, shape-compatible with np.r_ below
        c0, c1 = c[0:1, ...], c[1:2, ...]     # c[0], c[1]
        cm0, cm1 = c[-1:-2:-1, ...], c[-2:-3:-1, ...]    # c[-1], c[-2]
    
        c_ = np.r_[c0 * (t[5] + t[4] - 2 * t[3]) + c1,
                   c0 * (t[5] - t[3]) + c1,
                   c[1: -1, ...],
                   cm0 * (t[-4] - t[-6]) + cm1,
                   cm0 * (2 * t[-4] - t[-5] - t[-6]) + cm1]
    
        t, c_ = xp.asarray(t), xp.asarray(c_)
        return BSpline.construct_fast(t, c_, 3, axis=axis)
    

    This uses some functions that are part of Scipy's private API. It was tested successfully on scipy v 1.16.1.