rmlr

Imputation using MICE in mlr


I am trying to write my own imputation method in mlr using makeImputeMethod to perform multiple imputation by chained equations with the mice package in R. My imputeMice() method runs to completion but I get the following error after it has completed:

Error in `[.data.frame`(data, ind) : undefined columns selected

I am not sure why, nor where it is coming from. This is the code I have written:

library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#> 
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#> 
#>     cbind, rbind

data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE)                              # Tuning: 5-fold CV, no repeats

imputeMice = function() {
  makeImputeMethod(
    learn = function(data, target, col) {
      return(list(values = data))
    },
    impute = function(data, target, col, values) {
      data = as.data.frame(data)
      excl = names(data)[ sapply(data, is.factor) ]
      predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
      imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
      x = mice::complete(imp_data)
      print("Imputation completed")
      return(x)
    }
  )
}

lrn = makeFilterWrapper(
  makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"), 
  fw.method="univariate.model.score",
  fw.perc=0.1,
  cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))

res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
               measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures:             cindex
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> Error in `[.data.frame`(data, ind): undefined columns selected

Created on 2020-06-16 by the reprex package (v0.3.0)

Clearly the function imputeMice() is called on each column of data.frame pbc. But using mice we should only have to call this function once, and it performs imputation on each column. Is this possible in mlr?


Solution

  • The error was mine - I should have called mice in the learn function, rather than the impute function. I find the names of these functions confusing. My new code is below and this is working. But it calls mice on every column. I really only need to call it once. Is this possible?

    library(survival)
    #> Warning: package 'survival' was built under R version 3.6.3
    library(mlr)
    #> Warning: package 'mlr' was built under R version 3.6.3
    #> Loading required package: ParamHelpers
    #> Warning: package 'ParamHelpers' was built under R version 3.6.3
    #> 'mlr' is in maintenance mode since July 2019. Future development
    #> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
    library(lattice)
    #> Warning: package 'lattice' was built under R version 3.6.3
    library(mice)
    #> Warning: package 'mice' was built under R version 3.6.3
    #> 
    #> Attaching package: 'mice'
    #> The following objects are masked from 'package:base':
    #> 
    #>     cbind, rbind
    
    data(pbc)
    task_id = "PBC"
    pbc[pbc$status == 2, "status"] = 1
    pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
    outer = makeResampleDesc("CV", iters=2, stratify=TRUE)                              # Tuning: 5-fold CV, no repeats
    
    imputeMice = function() {
      makeImputeMethod(
        learn = function(data, target, col) {
          data = as.data.frame(data)
          excl = names(data)[ sapply(data, is.factor) ]
          predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
          imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
          x = mice::complete(imp_data)
          return(list(values = x[[col]]))
        },
        impute = function(data, target, col, values) {
          data[[col]] = values
          return(data[[col]])
        }
      )
    }
    
    lrn = makeFilterWrapper(
      makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"), 
      fw.method="univariate.model.score",
      fw.perc=0.1,
      cache=TRUE
    )
    lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))
    
    res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
                   measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
    #> Resampling: cross-validation
    #> Measures:             cindex
    #> [Resample] iter 1:    0.7069869
    #> [Resample] iter 2:    0.7138798
    #> 
    #> Aggregated Result: cindex.test.mean=0.7104333
    #> 
    

    Created on 2020-06-19 by the reprex package (v0.3.0)