I am trying to implement an ensemble of 4 DARTS models each having fit and predict methods. My goal is as follows :
These models individually work smoothly on the dataset, but together in this ensemble they seem to throw an error. I have a created a small minimalistic reproducible code below, very similar to mine, which throws the same error.
col1 = list(np.arange(1,40))
col2 = list(np.arange(41,80))
col3 = list(np.arange(81,120))
col4 = list(np.arange(121,160))
month = pd.date_range('2019-08-01','2022-10-01',freq='1MS').strftime("%Y-%b").tolist()
dt = {'col1': col1,'col2':col2,'col3':col3,'target':col4,'month':month}
df = pd.DataFrame(dt)
df['month'] = pd.to_datetime(df['month'])
df.index = df['month']
df = df.drop(['month'],axis = 1)
X_train_t = TimeSeries.from_dataframe(df,value_cols=list(df.columns.drop(["col3"])),freq="1MS")
y_train_t = TimeSeries.from_dataframe(df,freq = '1MS',value_cols = 'target')
#Initializing model and fitting
torch_metrics = SymmetricMeanAbsolutePercentageError()
model1 = XGBModel(lags= [-1,-2,-3], lags_past_covariates=[-1,-2,-3],random_state = 42,output_chunk_length=1)
model2 = LightGBMModel(lags= [-1,-2,-3], lags_past_covariates=[-1,-2,-3],random_state = 42,output_chunk_length=1)
model3 = CatBoostModel(lags= [-1,-2,-3], lags_past_covariates=[-1,-2,-3], lags_future_covariates=None, output_chunk_length=1,random_state = 42)
model4 = BlockRNNModel(input_chunk_length=12,model = 'GRU',dropout=0.2,random_state = 42,output_chunk_length = 1)
forecasting_models = [model1,model2,model3,model4]
regression_model =RandomForest(lags=None, lags_past_covariates=None, lags_future_covariates=[0], output_chunk_length=1, add_encoders=None, n_estimators=100, max_depth=None)
model_ensemble = RegressionEnsembleModel(forecasting_models = forecasting_models, regression_train_n_points =38,regression_model = regression_model)
history = model_ensemble.fit(y_train_t,past_covariates = X_train_t)
pred = model_ensemble.predict(n=1, series = y_train_t,past_covariates = X_train_t)
Following is the whole error:
ValueError Traceback (most recent call last)
/tmp/ipykernel_27/3395707719.py in <module>
12 regression_model =RandomForest(lags=None, lags_past_covariates=None, lags_future_covariates=[0], output_chunk_length=1, add_encoders=None, n_estimators=100, max_depth=None)
13 model_ensemble = RegressionEnsembleModel(forecasting_models = forecasting_models, regression_train_n_points =38,regression_model = regression_model)
---> 14 history = model_ensemble.fit(y_train_t,past_covariates = X_train_t)
15
16 pred = model_ensemble.predict(n=1, series = y_train_t,past_covariates = X_train_t)
/opt/conda/lib/python3.7/site-packages/darts/models/forecasting/regression_ensemble_model.py in fit(self, series, past_covariates, future_covariates)
119 series=forecast_training,
120 past_covariates=past_covariates,
--> 121 future_covariates=future_covariates,
122 )
123
/opt/conda/lib/python3.7/site-packages/darts/models/forecasting/forecasting_model.py in _fit_wrapper(self, series, past_covariates, future_covariates)
1820 future_covariates=future_covariates
1821 if self.supports_future_covariates
-> 1822 else None,
1823 )
1824
/opt/conda/lib/python3.7/site-packages/darts/models/forecasting/xgboost.py in fit(self, series, past_covariates, future_covariates, val_series, val_past_covariates, val_future_covariates, max_samples_per_ts, **kwargs)
219 future_covariates=future_covariates,
220 max_samples_per_ts=max_samples_per_ts,
--> 221 **kwargs,
222 )
223
/opt/conda/lib/python3.7/site-packages/darts/models/forecasting/regression_model.py in fit(self, series, past_covariates, future_covariates, max_samples_per_ts, n_jobs_multioutput_wrapper, **kwargs)
481
482 self._fit_model(
--> 483 series, past_covariates, future_covariates, max_samples_per_ts, **kwargs
484 )
485
/opt/conda/lib/python3.7/site-packages/darts/models/forecasting/regression_model.py in _fit_model(self, target_series, past_covariates, future_covariates, max_samples_per_ts, **kwargs)
363
364 training_samples, training_labels = self._create_lagged_data(
--> 365 target_series, past_covariates, future_covariates, max_samples_per_ts
366 )
367
/opt/conda/lib/python3.7/site-packages/darts/models/forecasting/regression_model.py in _create_lagged_data(self, target_series, past_covariates, future_covariates, max_samples_per_ts)
334 lags_future_covariates=lags_future_covariates,
335 max_samples_per_ts=max_samples_per_ts,
--> 336 multi_models=self.multi_models,
337 )
338
/opt/conda/lib/python3.7/site-packages/darts/utils/data/tabularization.py in _create_lagged_data(target_series, output_chunk_length, past_covariates, future_covariates, lags, lags_past_covariates, lags_future_covariates, max_samples_per_ts, is_training, multi_models)
150 "Unable to build any training samples of the target series "
151 + (f"at index {idx} " if len(target_series) > 1 else "")
--> 152 + "and the corresponding covariate series; "
153 "There is no time step for which all required lags are available and are not NaN values.",
154 )
/opt/conda/lib/python3.7/site-packages/darts/logging.py in raise_if(condition, message, logger)
102 if `condition` is satisfied
103 """
--> 104 raise_if_not(not condition, message, logger)
105
106
/opt/conda/lib/python3.7/site-packages/darts/logging.py in raise_if_not(condition, message, logger)
76 if not condition:
77 logger.error("ValueError: " + message)
---> 78 raise ValueError(message)
79
80
ValueError: Unable to build any training samples of the target series and the corresponding covariate series; There is no time step for which all required lags are available and are not NaN values.
Your problem is in the following line:
model_ensemble = RegressionEnsembleModel(forecasting_models=forecasting_models, regression_train_n_points=38, regression_model=regression_model)
By giving the value 38
on a dataset of 39 points leaves a single point for the training of your list of time-series models, which cannot produce the lag values (-1 , -2, -3).
Maybe you misinterpreted the meaning of the regression_train_n_points
parameter: it is the number of point used by the regression ensambling RandomForest model, and not by the list of time-series regressors.