pythontime-seriesstatsforecast

How can I use features in statsforecast


How can I use features in statsforecast (e.g. moving average, lags, user defined function)?

fcst = StatsForecast(
    m4_daily_train,
    models = [(auto_arima,7)],
    freq = 'D',
    n_jobs = min(len(m4_daily_train.index.unique()),cpu_count())
)

Or is it possible to create the features on my own in a previous step in pandas and use then the total feature table in the fitting like...

df['lag1'] = df['y'].shift(1)
df['day'] = df['timestamp'].dt.day

fcst = StatsForecast(
    df,
    models = [(auto_arima,7)],
    freq = 'D',
    n_jobs = min(len(m4_daily_train.index.unique()),cpu_count())
)

Solution

  • You can use exogenous variables in the statsforecast by passing the training dataset which includes unique_id, ds, y, and exogenous variables, and the testing dataset which includes unique_id, ds, and future exogenous variable in the forecast step. For more information about Exogenous Regressors, you can find from official documentation

    from datasetsforecast.m5 import M5
    from statsforecast import StatsForecast
    from statsforecast.models import AutoARIMA
    
    Y_df, X_df, *_ = M5.load('./data')
    
    # Filter data 
    Y_ts = Y_df[Y_df['unique_id'] == 'FOODS_3_586_CA_3'].reset_index(drop = True)
    X_ts = X_df[X_df['unique_id'] == 'FOODS_3_586_CA_3'].reset_index(drop = True)
    
    X_ts = X_ts[['unique_id', 'ds', 'sell_price', 'snap_CA']]
    X_ts['unique_id'] = X_ts.unique_id.astype(str)
    
    # Extract dates for train and test set 
    dates = Y_df['ds'].unique()
    dtrain = dates[:-28]
    dtest = dates[-28:]
    
    Y_train = Y_ts.query('ds in @dtrain')
    Y_test = Y_ts.query('ds in @dtest') 
    
    X_train = X_ts.query('ds in @dtrain') 
    X_test = X_ts.query('ds in @dtest')
    
    # Add exogenous regressors
    train = Y_train.merge(X_ts, how = 'left', on = ['unique_id', 'ds']) 
    
    models = [AutoARIMA(season_length = 7)]
    sf = StatsForecast(
        models=models, 
        freq='D', 
        n_jobs=-1
    )
    
    fcst = sf.forecast(df=train, h=28, X_df=X_test, level=[95])
    fcst = fcst.reset_index()
    fcst.head()
    

    dataset