when I run my individual models with different training and test data my model works fine. I wanted to run a for loop and now I am getting the error not sure why.
I have created several time splits to check how the model is performing with different data breakdowns.
# dataframe opertations - pandas
import pandas as pd
# plotting data - matplotlib
from matplotlib import pyplot as plt
# time series - statsmodels
# Seasonality decomposition
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import seasonal_decompose
# holt winters
# single exponential smoothing
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
# double and triple exponential smoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from numpy import sqrt
from sklearn.metrics import mean_squared_error
df = pd.read_csv('/content/hw-cv-imputed.csv',index_col='date', parse_dates=True)
df.index.freq = 'W-FRI'
# finding shape of the dataframe
print(df.shape)
# having a look at the data
print(df.head())
# plotting the original data
df[['visits']].plot(title='visit Data')
#Splitting according to the above description
train1, test1 = df.iloc[:52, 0], df.iloc[52:62, 0]
train2, test2 = df.iloc[:56, 0], df.iloc[56:66, 0]
train3, test3 = df.iloc[:60, 0], df.iloc[60:70, 0]
train4, test4 = df.iloc[:65, 0], df.iloc[65:75, 0]
train5, test5 = df.iloc[:69, 0], df.iloc[69:79, 0]
train6, test6 = df.iloc[:73, 0], df.iloc[73:83, 0]
train7, test7 = df.iloc[:78, 0], df.iloc[78:88, 0]
train8, test8 = df.iloc[:82, 0], df.iloc[82:90, 0]
total_model_parameters = pd.DataFrame(columns = ['Total','Parameters'])
# Split into train and test set
#train_df = train1
#test_df = test1
from sklearn.model_selection import ParameterGrid
for train_df ,test_df in [('train1','test1'),('train2','test2'),('train3','test3'),('train4','test4'),('train5','test5'),('train6','test6'),('train7','test7')]:
params_grid = {'trend':('mul','add'),
'seasonal':('mul','add'),
'seasonal_periods': [10,12]}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
cnt = cnt+1
print('Total Possible Models',cnt)
model_parameters = pd.DataFrame(columns = ['Total','Parameters'])
for p in grid:
test = pd.DataFrame()
print(p)
**fitted_model = ExponentialSmoothing(train_df,trend=p['trend'],seasonal=p['seasonal'],seasonal_periods=p['seasonal_periods']).fit()**
test_predictions = fitted_model.forecast(10)
df_new = pd.concat((test_df,test_predictions.rename('predicted_visits'),(((test_df-test_predictions)/test_df)*100).rename('error')),axis=1)
def accuracy(row):
if abs(row['error']) < 20:
return 1
return 0
df_new['accuracy'] = df_new.apply(lambda row: accuracy(row), axis=1)
Total = df_new['accuracy'].sum()
print('Accuracy------------------------------------',Total)
model_parameters = model_parameters.append({'Total':Total,'Parameters':p},ignore_index=True)
parameters = model_parameters.sort_values(by=['Total'],ascending=False)
parameters = parameters.reset_index(drop=True)
parameters.head(9)
Parameters_1 = pd.DataFrame(parameters)
Parameters_1
parameters['Parameters'][0]
total_model_parameters = total_model_parameters.append(parameters)
total_model_parameters
The error is
for the line - *fitted_model = ExponentialSmoothing(train_df,trend=p['trend'],seasonal=p['seasonal'],seasonal_periods=p['seasonal_periods']).fit()*
ValueError: unrecognized data structures: <class 'str'> / <class 'NoneType'>
Can someone help, please? :)
p.s. The data is as follows
date visits
1/22/2021 7352070
1/29/2021 7063725
2/5/2021 9385950
2/12/2021 7851435
2/19/2021 9509640
2/26/2021 9919170
3/5/2021 9682125
3/12/2021 9597075
3/19/2021 8189835
3/26/2021 7487385
4/2/2021 8863965
4/9/2021 8856165
4/16/2021 8619345
4/23/2021 4499670
4/30/2021 3642705
5/7/2021 3105690
5/14/2021 3096330
5/21/2021 3240360
5/28/2021 5152410
6/4/2021 6471915
6/11/2021 4401030
6/18/2021 3197775
6/25/2021 2606340
7/2/2021 3248460
7/9/2021 4996425
7/16/2021 7775085
7/23/2021 9690795
7/30/2021 10041555
8/6/2021 11849055
8/13/2021 14598750
8/20/2021 15339390
8/27/2021 20118720
9/3/2021 12731115
9/10/2021 17456475
9/17/2021 20393850
9/24/2021 20537895
10/1/2021 20800935
10/8/2021 25035450
10/15/2021 22872450
10/22/2021 22790130
10/29/2021 22036965
11/5/2021 26988975
11/12/2021 29194530
11/19/2021 26106000
11/26/2021 29928660
12/3/2021 29254335
12/10/2021 32165430
12/17/2021 27303570
12/24/2021 21453585
12/31/2021 21568815
1/7/2022 21286680
1/14/2022 25589715
1/21/2022 21890130
1/28/2022 20881515
2/4/2022 24185835
2/11/2022 24160590
2/18/2022 20253360
2/25/2022 20450910
3/4/2022 26542320
3/11/2022 25540335
3/18/2022 29602380
3/25/2022 32258340
4/1/2022 24953640
4/8/2022 22872165
4/15/2022 25784490
4/22/2022 25168356
4/29/2022 25405687
5/6/2022 24693295
5/13/2022 26374944
5/20/2022 26192271
5/27/2022 26868125
6/3/2022 27948287
6/10/2022 28320595
6/17/2022 28153788
6/24/2022 27470327
7/1/2022 30520950
7/8/2022 28635750
7/15/2022 26269140
7/22/2022 24236250
7/29/2022 20541675
8/5/2022 21190020
8/12/2022 22389675
8/19/2022 24496455
8/26/2022 27555645
9/2/2022 26324760
9/9/2022 32937450
9/16/2022 36577425
9/23/2022 33522000
9/30/2022 30759780
10/7/2022 30615870
The problem is that you have '
quoted your variable names so that
for train_df ,test_df in [('train1','test1'),...]
shouldn't have the '
s.
You can do away with that line if you're happy to put your pairs of training and test data into a list of tuples like this
import pandas as pd
from sklearn.model_selection import ParameterGrid
from statsmodels.tsa.holtwinters import ExponentialSmoothing
df = pd.read_csv("hw-cv-imputed.csv", index_col="date", parse_dates=True)
df.index.freq = "W-FRI"
# finding shape of the dataframe
print(df.shape)
# having a look at the data
print(df.head())
# plotting the original data
df[["visits"]].plot(title="visit Data")
# Splitting according to the above description
train_and_test = []
train_and_test.append((df.iloc[:52, 0], df.iloc[52:62, 0]))
train_and_test.append((df.iloc[:56, 0], df.iloc[56:66, 0]))
train_and_test.append((df.iloc[:60, 0], df.iloc[60:70, 0]))
train_and_test.append((df.iloc[:65, 0], df.iloc[65:75, 0]))
train_and_test.append((df.iloc[:69, 0], df.iloc[69:79, 0]))
train_and_test.append((df.iloc[:73, 0], df.iloc[73:83, 0]))
train_and_test.append((df.iloc[:78, 0], df.iloc[78:88, 0]))
train_and_test.append((df.iloc[:82, 0], df.iloc[82:90, 0]))
total_model_parameters = pd.DataFrame(columns=["Total", "Parameters"])
for train_df, test_df in train_and_test:
params_grid = {
"trend": ("mul", "add"),
"seasonal": ("mul", "add"),
"seasonal_periods": [10, 12],
}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
cnt = cnt + 1
print("Total Possible Models", cnt)
model_parameters = pd.DataFrame(columns=["Total", "Parameters"])
for p in grid:
...