Based on the accepted answer of this question, I am trying to implement a polynomial regressor using RANSAC to fit a polynomial of order 5.
Let the data to adjust be
x = [0.02965717 0.10966089 0.17002236 0.19015372 0.27044443 0.33011883
0.40844298 0.4659353 0.54051902 0.61236153 0.68116213 0.74673223
0.82403296 0.88216575 0.96342659]
y = [3.96001134e-03 6.81505094e-04 0.00000000e+00 1.13660854e-04
2.26741003e-03 5.64587625e-03 1.24338500e-02 1.91707798e-02
3.02265331e-02 4.34929443e-02 5.87863985e-02 7.59236901e-02
9.96780928e-02 1.20366687e-01 1.53936744e-01]
and the estimator used
from sklearn.linear_model import RANSACRegressor
from sklearn.metrics import mean_squared_error
class PolynomialRegression(object):
def __init__(self, degree=3, coeffs=None):
print(f"Degree: {degree}")
self.degree = degree
self.coeffs = coeffs
def fit(self, X, y):
self.coeffs = np.polyfit(X.ravel(), y, self.degree)
def get_params(self, deep=False):
return {'coeffs': self.coeffs}
def set_params(self, coeffs=None, random_state=None):
self.coeffs = coeffs
def predict(self, X):
poly_eqn = np.poly1d(self.coeffs)
y_hat = poly_eqn(X.ravel())
return y_hat
def score(self, X, y):
return mean_squared_error(y, self.predict(X))
The fit is done in the following snippet:
import numpy as np
ransac = RANSACRegressor(base_estimator=PolynomialRegression(degree=5),
residual_threshold=np.std(y),
random_state=0,
min_samples=2)
ransac.fit(np.expand_dims(x, axis=1), y)
w = np.array(ransac.estimator_.coeffs)
print(w)
As you can see, I am passing to RANSACRegressor a PolynomialRegression of degree=5
, and I expect w
to have 6 components. Nevertheless, when executing the code, the degree of the PolynomialRegression is changed to 3 at some moment, and the fit is done with that default value, not the one I am building it with.
Output:
Degree: 5
Degree: 3
[ 0.07331904 0.14501533 -0.05369491 0.00492718]
How can I properly define the degree of the fit to be done?
The problem is in the get_params
method as this is expected to return the hyperparameters of the estimator, i.e. it should return the degree
of the polynomial regression, not the estimated regression coefficients. See the documentation.
import warnings
import numpy as np
from sklearn.linear_model import RANSACRegressor
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression
warnings.filterwarnings('ignore')
class PolynomialRegression(object):
def __init__(self, degree=3):
print(f"Degree: {degree}")
self.degree = degree
def fit(self, X, y):
self.coeffs = np.polyfit(X.ravel(), y, self.degree)
def get_params(self, deep=False):
return {'degree': self.degree}
def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
return self
def predict(self, X):
poly_eqn = np.poly1d(self.coeffs)
y_hat = poly_eqn(X.ravel())
return y_hat
def score(self, X, y):
return mean_squared_error(y, self.predict(X))
x, y = make_regression(n_features=1, random_state=42)
ransac = RANSACRegressor(
base_estimator=PolynomialRegression(degree=5),
residual_threshold=np.std(y),
random_state=0,
min_samples=2
)
ransac.fit(x, y)
print(ransac.estimator_.coeffs)
# Degree: 5
# Degree: 5
# [ 2.15861169e-14 1.51841316e-14 -5.09828681e-14 2.71301269e-15
# 4.17411003e+01 -5.11272743e-15]