rmachine-learningrandom-forestmlr

How to construct a learner or random forest regression in R


Example scripts for random forest using the mlr package are for classification problems. I have a random forest regression model for water contamination. I could classify the continuous target variable but I would rather run a regression. If anyone knows a script for random forest regression using mlr please share. I am stuck at the make a learner stage: neither of these lines work:

regr.lrn = makeLearner("regr.gbm", n.trees = 100)

regr.lrn = makeLearner("regr.randomForest", predict.type = "prob", fix.factors.prediction = TRUE)

Solution

  • Does this example help?

    library(caret)
    library(mlr)
    library(MASS)
    
    Boston1 <- Boston[,9:14]
    datasplit <- createDataPartition(y = Boston1$medv, times = 1,
                                     p = 0.75, list = FALSE)
    
    Boston1_train <- Boston1[datasplit,]
    Boston1_test <- Boston1[-datasplit,]
    
    rtask <- makeRegrTask(id = "bh", data = Boston1_train, target = "medv")
    rmod <- train(makeLearner("regr.lm"), rtask)
    rmod1 <- train(makeLearner("regr.fnn"), rtask)
    rmod2 <- train(makeLearner("regr.randomForest"), rtask)
    
    plotLearnerPrediction("regr.lm", features = "lstat", task = rtask)
    

    example_1.png

    plotLearnerPrediction("regr.fnn", features = "lstat", task = rtask)
    

    example_2.png

    plotLearnerPrediction("regr.randomForest", features = "lstat", task = rtask)
    

    example_3.png

    # what other learners could be used?
    # lrns = listLearners()
    rmod3 <- train(makeLearner("regr.cforest"), rtask)
    rmod4 <- train(makeLearner("regr.ksvm"), rtask)
    rmod5 <- train(makeLearner("regr.glmboost"), rtask)
    plotLearnerPrediction("regr.cforest", features = "lstat", task = rtask)
    

    example_4.png

    plotLearnerPrediction("regr.ksvm", features = "lstat", task = rtask)
    

    example_5.png

    plotLearnerPrediction("regr.glmboost", features = "lstat", task = rtask)
    

    example_6.png

    pred_mod = predict(rmod, newdata = Boston1_test)
    pred_mod1 = predict(rmod1, newdata = Boston1_test)
    pred_mod2 = predict(rmod2, newdata = Boston1_test)
    pred_mod3 = predict(rmod3, newdata = Boston1_test)
    pred_mod4 = predict(rmod4, newdata = Boston1_test)
    pred_mod5 = predict(rmod5, newdata = Boston1_test)
    
    predictions <- data.frame(truth = pred_mod$data$truth,
                              regr.lm = pred_mod$data$response,
                              regr.fnn = pred_mod1$data$response,
                              regr.randomForest = pred_mod2$data$response,
                              regr.cforest = pred_mod3$data$response, 
                              regr.ksvm = pred_mod4$data$response,
                              regr.glmboost = pred_mod5$data$response)
    
    library(tidyverse)
    predictions %>%
      pivot_longer(-truth, values_to = "predicted_value") %>%
      ggplot(aes(x = truth, y = predicted_value, color = name)) +
      geom_smooth() +
      geom_abline(slope = 1, intercept = c(0, 0))
    

    example_7.png