rtime-seriesr-caret

RMSE wrongly matches MAE using caret package method "timeslice" when horizon = 1 in R


Why does the linear_model RMSE match the MAE when I run the code below? The linear_model RMSE also does not match the RMSE calculated by hand.

library(caret)

e <- rnorm(750)

x <- rnorm(750, 5)

y <- 10 * x + e

model_data <- data.frame(y, x)

model_tuning <- trainControl(
  method          = "timeslice",
  initialWindow   = 36,
  horizon         = 1,
  fixedWindow     = TRUE,
  savePredictions = TRUE
)

linear_model <- train(
  form       = y ~ x,
  data       = model_data,
  method     = "lm",
  trControl  = model_tuning
)

linear_model

error <- linear_model$pred$pred - linear_model$pred$obs

sqrt(mean(error^2))

mean(abs(error))

Solution

  • So each resample produces an RMSE, Rsquared, and MAE. Since there is only one predicted value, the RMSE and the MAE are the same, and the Rsquared cannot be calculated. The resample$RMSE and the resample$MAE are the mean RMSE and the mean MAE across the resamples. To get the predicted versus observed statistics, the function is defaultSummary or postResample. See code below.

    library(caret)
    
    e <- rnorm(750)
    
    x <- rnorm(750, 5)
    
    y <- 10 * x + e
    
    model_data <- data.frame(y, x)
    
    model_tuning <- trainControl(
      method          = "timeslice",
      initialWindow   = 36,
      horizon         = 1,
      fixedWindow     = TRUE,
      savePredictions = TRUE
    )
    
    linear_model <- train(
      form       = y ~ x,
      data       = model_data,
      method     = "lm",
      trControl  = model_tuning
    )
    
    #These are the results from each of the resamples
    linear_model
    
    head(linear_model$resample)
    
    mean(linear_model$resample$RMSE)
    
    mean(linear_model$resample$MAE)
    
    #These are the predicted versus observed statistics
    predictions <- data.frame(
      pred = linear_model$pred$pred,
      obs  = linear_model$pred$obs
    )
    
    defaultSummary(predictions)
    
    postResample(
      pred = linear_model$pred$pred,
      obs  = linear_model$pred$obs
    )
    
    error <- linear_model$pred$pred - linear_model$pred$obs
    
    sqrt(mean(error^2))
    
    mean(abs(error))