I have an existing NN model (Sequential model) with a train split test. I need to add cross-validation to my dataset; upon implementation of cross-validation, I get the following error.
TypeError: Cannot clone object '<tensorflow.python.keras.engine.sequential.Sequential object at 0x000001B5D2100108>' (type <class 'tensorflow.python.keras.engine.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' methods.
The code for the model is as follows when I add the cross-validation to an existing train-test-split which was working.
Dataset Spliting
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_val_predict # For Cross validation I have added this
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=101)
from sklearn.model_selection import cross_val_score, cross_val_predict # For Cross validation I have added this
from sklearn import metrics # For Cross validation, I have added this
Scaling Data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
Creating the Model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Dropout
Training the Model
from tensorflow.keras.layers import Dropout
model = Sequential()
model.add(Dense(units=70,activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=15,activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
model.fit(x=X_train,
y=y_train,
epochs=600,
validation_data=(X_test, y_test), verbose=1,
callbacks=[early_stop]
)
This is where adding the cross-validation predictor gives and error
predictions = cross_val_predict(model, X_test, y_test, cv=3) # for cross validation ** (model, df, y, cv=3)
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()
The complete Error
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
796 try:
--> 797 tasks = self._ready_batches.get(block=False)
798 except queue.Empty:
~\anaconda3\lib\queue.py in get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-32-2b7d023d5ca4> in <module>
----> 1 predictions = cross_val_predict(model, X_test, y_test, cv=3) # for cross validation ** (model, df, y, cv=3)
~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_predict(estimator, X, y, groups, cv, n_jobs, verbose, fit_params, pre_dispatch, method)
753 prediction_blocks = parallel(delayed(_fit_and_predict)(
754 clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 755 for train, test in cv.split(X, y, groups))
756
757 # Concatenate the predictions
~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1002 # remaining jobs.
1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator):
1005 self._iterating = self._original_iterator is not None
1006
~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
806 big_batch_size = batch_size * n_jobs
807
--> 808 islice = list(itertools.islice(iterator, big_batch_size))
809 if len(islice) == 0:
810 return False
~\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in <genexpr>(.0)
753 prediction_blocks = parallel(delayed(_fit_and_predict)(
754 clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 755 for train, test in cv.split(X, y, groups))
756
757 # Concatenate the predictions
~\anaconda3\lib\site-packages\sklearn\base.py in clone(estimator, safe)
65 "it does not seem to be a scikit-learn estimator "
66 "as it does not implement a 'get_params' methods."
---> 67 % (repr(estimator), type(estimator)))
68 klass = estimator.__class__
69 new_object_params = estimator.get_params(deep=False)
The Type Error is
TypeError: Cannot clone object '<tensorflow.python.keras.engine.sequential.Sequential object at 0x000001577B632148>' (type <class 'tensorflow.python.keras.engine.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' methods.
Point is that your model is not a sklearn estimator as the error suggests (in particular, it lacks of the .get_params()
method), while cross_val_predict()
requires a sklearn estimator to be passed to it.
A way to get around this is to wrap your Keras model in an object that mimics a regular sklearn estimator via a scikeras.wrappers.KerasClassifier
object. Once you've defined your KerasClassifier
you can use it as a classical sklearn classifier and therefore pass it to cross_val_predict()
.
Here's a working example that starts from your snippet:
!pip install scikeras
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.datasets import make_classification
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping
X, y = make_classification(n_samples=10000, n_features=70, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
model = Sequential()
model.add(Dense(units=70,activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=15,activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
model.fit(x=X_train,
y=y_train,
epochs=600,
validation_data=(X_test, y_test), verbose=1,
callbacks=[early_stop]
)
# define the KerasClassifier object and use it in cross_val_predict
keras_clf = KerasClassifier(model = model, optimizer="adam", epochs=100, verbose=0)
predictions = cross_val_predict(keras_clf, X_train, y_train, cv=3)