I have been trying to convert a dictionary into a dataframe but everytime i keep getting ValueError: All arrays must be of the same length. i Have checkde the length of each array and confirmed them to be the same but i am still getting the same error
def metrics_from_pipes(pipes_dict):
for name, pipeline in pipes_dict.items():
pipeline.fit(X_train, y_train)
y_pred_val = pipeline.predict(X_val)
y_pred_train = pipeline.predict(X_train)
train_metrics = {
'model':list(pipes_dict.keys()),
'MAE':train_mae,
'MAPE':train_mape,
'RMSE':train_rmse,
'RSquared':train_rsquared
}
train_metrics_data = pd.DataFrame(train_metrics)
val_metrics = {
'model':list(pipes_dict.keys()),
'MAE':val_mae,
'MAPE':val_mape,
'RMSE':val_rmse,
'RSquared':val_rsquared
}
val_metrics_data = pd.DataFrame(val_metrics,)
#Merging metrics from train and test set
train_val_metrics = train_metrics_data.merge(val_metrics_data,
on = 'Model',
how = 'left',
suffixes = ('_train', '_val'))
# sorting columns
train_val_metrics = train_val_metrics.reindex(columns = ['Model',
'MAE_train',
'MAPE_train',
'RMSE_train',
'RSquared_train',
'MAE_val',
'MAPE_val',
'RMSE_val',
'RSquared_val'])
return train_val_metrics.set_index('Model').transpose()
# get the metrics table
metrics_table = metrics_from_pipes(pipelines)
running this code gives this error
ValueError Traceback (most recent call last)
Cell In[45], line 82
80 return train_val_metrics.set_index('Model').transpose()
81 # get the metrics table
---> 82 metrics_table = metrics_from_pipes(pipelines)
83 #print('Table 1: Base Models Metrics')
84 #metrics_table.style.background_gradient(cmap = Blues)
85 metrics_table
Cell In[45], line 50, in metrics_from_pipes(pipes_dict)
41 # aggregate the performance metric lists into seperate dataframes
42 train_metrics = {
43 'model':list(pipes_dict.keys()),
44 'MAE':train_mae,
(...)
47 'RSquared':train_rsquared
48 }
---> 50 train_metrics_data = pd.DataFrame(train_metrics)
51 val_metrics = {
52 'model':list(pipes_dict.keys()),
53 'MAE':val_mae,
(...)
56 'RSquared':val_rsquared
57 }
59 val_metrics_data = pd.DataFrame(val_metrics,)
ValueError: All arrays must be of the same length
when i checked for the result of the dictionary for both train_metrics and val metrics, i got this
({'model': ['Linear Regression',
'Random Forest Regressor',
'Gradient Boost Regression',
'Extra Tree Regressor'],
'MAE': [829.1023412412194,
288.33455697065233,
712.9637267872279,
0.0010629575741748962],
'MAPE': [1.0302372135902111,
0.20937541440883897,
0.538244903316323,
6.306697580961048e-07],
'RMSE': [1120.5542708017374,
416.48933196590013,
1012.399201767692,
0.05804079289490426],
'RSquared': [0.5598288286601083,
0.9391916010838417,
0.6406981997919169,
0.9999999988190745]},
{'model': ['Linear Regression',
'Random Forest Regressor',
'Gradient Boost Regression',
'Extra Tree Regressor'],
'MAE': [855.9254413559535,
802.5902302175274,
772.3140648475379,
839.9018341377154],
'MAPE': [1.0395487579496652,
0.5607987708065988,
0.5438627253681279,
0.5852285872937784],
'RMSE': [1148.6549900167981,
1158.8411708570625,
1109.6145558003204,
1223.23337689915],
'RSquared': [0.5876710102285392,
0.5803255834810521,
0.6152231339508221,
0.5323905190373128]})
Print the contents of train_metrics, just before line 50;
train_metrics_data = pd.DataFrame(train_metrics)
Then you will see what the dict looked like just before it crashes. I ran a part of the faulty code, and it seems to work just fine.
Using python 3.10.14 :
import pandas as pd
train_metrics = {'model': ['Linear Regression',
'Random Forest Regressor',
'Gradient Boost Regression',
'Extra Tree Regressor'],
'MAE': [829.1023412412194,
288.33455697065233,
712.9637267872279,
0.0010629575741748962],
'MAPE': [1.0302372135902111,
0.20937541440883897,
0.538244903316323,
6.306697580961048e-07],
'RMSE': [1120.5542708017374,
416.48933196590013,
1012.399201767692,
0.05804079289490426],
'RSquared': [0.5598288286601083,
0.9391916010838417,
0.6406981997919169,
0.9999999988190745]}
train_metrics_data = pd.DataFrame(train_metrics)
print(train_metrics_data)
prints:
model MAE MAPE RMSE RSquared
0 Linear Regression 829.102341 1.030237e+00 1120.554271 0.559829
1 Random Forest Regressor 288.334557 2.093754e-01 416.489332 0.939192
2 Gradient Boost Regression 712.963727 5.382449e-01 1012.399202 0.640698
3 Extra Tree Regressor 0.001063 6.306698e-07 0.058041 1.000000