I am trying to get SHAP values for my models (which I built using caret). I have an RF model, and the data is:
data = structure(list(Main_Street = structure(c(2L, 3L, 2L, 1L, 3L,
2L, 3L, 1L, 2L, 2L), .Label = c("64", "70", "270"), class = "factor"),
Blocked_Lanes = c(3L, 4L, 2L, 1L, 1L, 2L, 6L, 3L, 3L, 3L),
Total_Vehicle_Count = c(1L, 2L, 2L, 2L, 1L, 4L, 3L, 2L, 2L,
1L), Tractor_Trailer_Count = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Weather_Winter_Storm = structure(c(1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
Weather_Rain = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L), .Label = c("No", "Yes"), class = "factor"), Injuries_Count = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L), Accident_Overturned_Car = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("No", "Yes"
), class = "factor"), Fatalities_Count = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), Speed = c(65L, 46L, 10L, 42L, 40L,
21L, 15L, 57L, 59L, 59L), Total_Volume = c(48.7, 22.5, 47.3,
102, 138, 75.3, 60.5, 83.3, 18, 26.7), Occupancy = c(3.5,
1.7, 40.8, 23.8, 14.1, 31, 27.1, 4.9, 2.6, 2.5), Lanes_Cleared_Duration = c(53L,
35L, 32L, 4L, 11L, 35L, 42L, 12L, 36L, 69L)), row.names = c(NA,
-10L), class = "data.frame")
The RF model is:
fitControl <- trainControl(method = "repeatedcv",
number = 10,
repeats = 10)
set.seed (2356)
randomforestGrid <- expand.grid(mtry = c(2:sqrt(61))) # better be a dataframe
set.seed(2356)
rf_model <- train(Lanes_Cleared_Duration~.,
data = training,
method = "rf",
trControl = fitControl,
metric= "RMSE",
verbose = FALSE,
tuneGrid = randomforestGrid,
n.trees = c(1:50)*100)
There are many sources on how to do SHAP plots, but none works for my data, and I keep getting errors. For example, This post tried to ask a similar question, but it didn't solve the problem. Here is the plot that I want to get something similar to:
Is it also possible to export a data frame containing SHAP values for each variable?
Here is an example slightly modified from our {kernelshap} README:
library(caret)
library(kernelshap)
library(shapviz)
fit <- train(
Sepal.Length ~ .,
data = iris,
method = "rf",
tuneGrid = data.frame(mtry = 2:4),
trControl = trainControl(method = "oob")
)
# take subsample as bg_X if data has >500 rows or so
s <- kernelshap(fit, X = iris[, -1], bg_X = iris)
sv <- shapviz(s)
sv_importance(sv, kind = "bee")
sv_dependence(sv, v = colnames(iris[, -1]))
head(s$S)
Sepal.Width Petal.Length Petal.Width Species
[1,] 0.18710551 -0.7689923 -0.11966640 -0.02138098
[2,] -0.04975942 -0.8421627 -0.16929579 -0.02247297
[3,] -0.05134404 -0.9807516 -0.21007903 -0.02603232
[4,] -0.01474815 -0.8314441 -0.18571834 -0.02234505
[5,] 0.16345002 -0.8066228 -0.13735372 -0.02104766
[6,] 0.27269103 -0.6231013 -0.06449333 -0.01560054