I want to remove passwords from several Excel files, so I specified a data frame with paths and passwords and then added some python code running with reticulate.
When I pass that function to a for loop, it does work perfectly fine. However, when using map2, I'm getting an error:
Error in !trace_length(trace) : invalid argument type
Here's my code (NOTE: you need to change the path to your desired path and need to make sure you have two respective Excel files with "test" and "test2" as their passwords):
library(tidyverse)
library(reticulate)
pw_dat <- data.frame(path = c("C:/Users/USERNAME/Downloads/file1.xlsx",
"C:/Users/USERNAME/Downloads/file2.xlsx"),
pw = c("test", "test2"))
# function with python code
unlock_excel <- function(file, password)
{
output_folder <- "C:/Users/USERNAME/Downloads/test/"
file <- normalizePath(file)
py_code <- sprintf("
import pathlib
import msoffcrypto
def unlock(filename, passwd, output_folder):
temp = open(filename, 'rb')
excel = msoffcrypto.OfficeFile(temp)
excel.load_key(passwd)
out_path = pathlib.Path(output_folder)
if not out_path.exists():
out_path.mkdir(parents=True, exist_ok=True)
with open(str(out_path / pathlib.Path(filename).name), 'wb') as f:
excel.decrypt(f)
temp.close()
unlock('%s', '%s', '%s')
", file_path, password, output_folder)
# run python script
py_run_string(py_code)
}
## This works
for (i in 1:nrow(pw_dat)) {
file_path <- pw_dat[i, "path"]
password <- pw_dat[i, "pw"]
unlock_excel(file_path, password)
}
## This doesn't work
map2(.x = pw_dat$path,
.y = pw_dat$pw,
.f = ~unlock_excel(file = .x, password = .y))
UPDATE: I think it has sth. to do with the normalizePath
code. Without it, map2 works.
UPDATE 2: Here's the full traceback:
Error in !trace_length(trace) : invalid argument type
10.
stop(structure(list(message = " File \"<string>\", line 20\n unlock('C:\\Users\\USERNAME\\Downloads\\file1.xlsx', 'test', 'C:/Users/MartinDegen/Downloads/test/')\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nSyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \\UXXXXXXXX escape\n\033[90mRun \033]8;;rstudio:run:reticulate::py_last_error()\a`reticulate::py_last_error()`\033]8;;\a for details.\033[39m",
call = py_run_string_impl(code, local, convert)), class = c("python.builtin.SyntaxError",
"python.builtin.Exception", "python.builtin.BaseException", "python.builtin.object",
"error", "condition"), py_object = <environment>))
9.
py_run_string_impl(code, local, convert)
8.
py_run_string(py_code)
7.
unlock_excel(file = .x, password = .y)
6.
.f(.x[[i]], .y[[i]], ...)
5.
call_with_cleanup(map2_impl, environment(), .type, .progress,
n, names, i)
4.
withCallingHandlers(expr, error = function(cnd) {
if (i == 0L) {
}
else { ...
3.
with_indexed_errors(i = i, names = names, error_call = .purrr_error_call,
call_with_cleanup(map2_impl, environment(), .type, .progress,
n, names, i))
2.
map2_("list", .x, .y, .f, ..., .progress = .progress)
1.
map2(.x = pw_dat$path, .y = pw_dat$pw, .f = ~unlock_excel(file = .x,
password = .y))
Update 3: result of py_code
:
[1] "\nimport pathlib\nimport msoffcrypto\n\ndef unlock(filename, passwd, output_folder):\n temp = open(filename, 'rb')\n excel = msoffcrypto.OfficeFile(temp)\n excel.load_key(passwd)\n out_path = pathlib.Path(output_folder)\n \n if not out_path.exists():\n out_path.mkdir(parents=True, exist_ok=True)\n\n with open(str(out_path / pathlib.Path(filename).name), 'wb') as f:\n excel.decrypt(f)\n \n temp.close()\n\n# Funktion aufrufen\nunlock('C:/Users/USERNAME/Downloads/file1.xlsx', 'test', 'C:/Users/USERNAME/Downloads/test/')\n"
In your sprintf()
call you are using file_path
(global object, updated in a loop) instead of file
(function arg & local object, never passed to your Python code):
unlock_excel <- function(file, password){
output_folder <- "C:/Users/USERNAME/Downloads/test/"
file <- normalizePath(file)
py_code <- sprintf("...
unlock('%s', '%s', '%s')
", file_path, password, output_folder)
# run python script
py_run_string(py_code)
}
It could be just a typo, but it would much easier to spot when global objects in functions are avoided. Also, there's no need to re-import modules and re-decalre Python function on every call; nor use R wrapper function. After executing Pyhon code and defining unlock()
, you can call it directly with py$unlock()
from R.
As unlock()
is called only for side effects and does not have a (non-None) return value, purrr::walk2()
would be preferred here over purrr::map2()
.
library(purrr)
library(reticulate)
py_require("msoffcrypto-tool")
# test files:
fs::dir_tree("excel")
#> excel
#> ├── file1.xlsx
#> └── file2.xlsx
pw_dat <-
data.frame(path = fs::path("excel") / c("file1.xlsx", "file2.xlsx"),
pw = c("test", "test2"))
pw_dat
#> path pw
#> 1 excel/file1.xlsx test
#> 2 excel/file2.xlsx test2
py_run_string("
import pathlib
import msoffcrypto
def unlock(filename, passwd, output_folder):
temp = open(filename, 'rb')
excel = msoffcrypto.OfficeFile(temp)
excel.load_key(passwd)
out_path = pathlib.Path(output_folder)
if not out_path.exists():
out_path.mkdir(parents=True, exist_ok=True)
with open(str(out_path / pathlib.Path(filename).name), 'wb') as f:
excel.decrypt(f)
temp.close()
")
fs::dir_create("excel/out")
walk2(
.x = pw_dat$path,
.y = pw_dat$pw,
.f = \(x, y) py$unlock(fs::path_abs(x), y, fs::path_abs("excel/out"))
)
Resulting dir tree:
fs::dir_tree("excel")
#> excel
#> ├── file1.xlsx
#> ├── file2.xlsx
#> └── out
#> ├── file1.xlsx
#> └── file2.xlsx