rpurrrreticulate

R Reticulate does work with for loop but not purrr::map2


I want to remove passwords from several Excel files, so I specified a data frame with paths and passwords and then added some python code running with reticulate.

When I pass that function to a for loop, it does work perfectly fine. However, when using map2, I'm getting an error:

Error in !trace_length(trace) : invalid argument type

Here's my code (NOTE: you need to change the path to your desired path and need to make sure you have two respective Excel files with "test" and "test2" as their passwords):

library(tidyverse)
library(reticulate)

pw_dat <- data.frame(path = c("C:/Users/USERNAME/Downloads/file1.xlsx",
                              "C:/Users/USERNAME/Downloads/file2.xlsx"),
                     pw   = c("test", "test2"))

# function with python code
unlock_excel <- function(file, password)
{
  
  output_folder <- "C:/Users/USERNAME/Downloads/test/"
  file <- normalizePath(file)
  
  py_code <- sprintf("
import pathlib
import msoffcrypto

def unlock(filename, passwd, output_folder):
    temp = open(filename, 'rb')
    excel = msoffcrypto.OfficeFile(temp)
    excel.load_key(passwd)
    out_path = pathlib.Path(output_folder)
    
    if not out_path.exists():
        out_path.mkdir(parents=True, exist_ok=True)

    with open(str(out_path / pathlib.Path(filename).name), 'wb') as f:
        excel.decrypt(f)
    
    temp.close()

unlock('%s', '%s', '%s')
", file_path, password, output_folder)
  
  # run python script
  py_run_string(py_code)
}


## This works
for (i in 1:nrow(pw_dat)) {
  file_path <- pw_dat[i, "path"]
  password <- pw_dat[i, "pw"]
  
  unlock_excel(file_path, password)
}


## This doesn't work
map2(.x = pw_dat$path,
     .y = pw_dat$pw,
     .f = ~unlock_excel(file = .x, password = .y))

UPDATE: I think it has sth. to do with the normalizePath code. Without it, map2 works.

UPDATE 2: Here's the full traceback:

Error in !trace_length(trace) : invalid argument type
10.
stop(structure(list(message = " File \"<string>\", line 20\n unlock('C:\\Users\\USERNAME\\Downloads\\file1.xlsx', 'test', 'C:/Users/MartinDegen/Downloads/test/')\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nSyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \\UXXXXXXXX escape\n\033[90mRun \033]8;;rstudio:run:reticulate::py_last_error()\a`reticulate::py_last_error()`\033]8;;\a for details.\033[39m",
call = py_run_string_impl(code, local, convert)), class = c("python.builtin.SyntaxError",
"python.builtin.Exception", "python.builtin.BaseException", "python.builtin.object",
"error", "condition"), py_object = <environment>))
9.
py_run_string_impl(code, local, convert)
8.
py_run_string(py_code)
7.
unlock_excel(file = .x, password = .y)
6.
.f(.x[[i]], .y[[i]], ...)
5.
call_with_cleanup(map2_impl, environment(), .type, .progress,
n, names, i)
4.
withCallingHandlers(expr, error = function(cnd) {
if (i == 0L) {
}
else { ...
3.
with_indexed_errors(i = i, names = names, error_call = .purrr_error_call,
call_with_cleanup(map2_impl, environment(), .type, .progress,
n, names, i))
2.
map2_("list", .x, .y, .f, ..., .progress = .progress)
1.
map2(.x = pw_dat$path, .y = pw_dat$pw, .f = ~unlock_excel(file = .x,
password = .y))

Update 3: result of py_code:

[1] "\nimport pathlib\nimport msoffcrypto\n\ndef unlock(filename, passwd, output_folder):\n    temp = open(filename, 'rb')\n    excel = msoffcrypto.OfficeFile(temp)\n    excel.load_key(passwd)\n    out_path = pathlib.Path(output_folder)\n    \n    if not out_path.exists():\n        out_path.mkdir(parents=True, exist_ok=True)\n\n    with open(str(out_path / pathlib.Path(filename).name), 'wb') as f:\n        excel.decrypt(f)\n    \n    temp.close()\n\n# Funktion aufrufen\nunlock('C:/Users/USERNAME/Downloads/file1.xlsx', 'test', 'C:/Users/USERNAME/Downloads/test/')\n"

Solution

  • In your sprintf() call you are using file_path (global object, updated in a loop) instead of file (function arg & local object, never passed to your Python code):

    unlock_excel <- function(file, password){
      
      output_folder <- "C:/Users/USERNAME/Downloads/test/"
      file <- normalizePath(file)
      
      py_code <- sprintf("...
    unlock('%s', '%s', '%s')
    ", file_path, password, output_folder)
      
      # run python script
      py_run_string(py_code)
    }
    

    It could be just a typo, but it would much easier to spot when global objects in functions are avoided. Also, there's no need to re-import modules and re-decalre Python function on every call; nor use R wrapper function. After executing Pyhon code and defining unlock(), you can call it directly with py$unlock() from R.

    As unlock() is called only for side effects and does not have a (non-None) return value, purrr::walk2() would be preferred here over purrr::map2().

    library(purrr)
    library(reticulate)
    py_require("msoffcrypto-tool")
    
    # test files:
    fs::dir_tree("excel")
    #> excel
    #> ├── file1.xlsx
    #> └── file2.xlsx
    pw_dat <- 
      data.frame(path = fs::path("excel") / c("file1.xlsx", "file2.xlsx"),
                 pw   = c("test", "test2"))
    pw_dat
    #>               path    pw
    #> 1 excel/file1.xlsx  test
    #> 2 excel/file2.xlsx test2
    
    py_run_string("
    import pathlib
    import msoffcrypto
    
    def unlock(filename, passwd, output_folder):
        temp = open(filename, 'rb')
        excel = msoffcrypto.OfficeFile(temp)
        excel.load_key(passwd)
        out_path = pathlib.Path(output_folder)
        
        if not out_path.exists():
            out_path.mkdir(parents=True, exist_ok=True)
    
        with open(str(out_path / pathlib.Path(filename).name), 'wb') as f:
            excel.decrypt(f)
        
        temp.close()
    ")
    
    fs::dir_create("excel/out")
    walk2(
      .x = pw_dat$path, 
      .y = pw_dat$pw, 
      .f = \(x, y) py$unlock(fs::path_abs(x), y, fs::path_abs("excel/out"))
    )
    

    Resulting dir tree:

    fs::dir_tree("excel")
    #> excel
    #> ├── file1.xlsx
    #> ├── file2.xlsx
    #> └── out
    #>     ├── file1.xlsx
    #>     └── file2.xlsx