pythonrreticulate

Running tslearn via reticulate leads to TypeError


I am trying to run TimeSeriesKMeans from TSlearn via reticulate but apparently fail to properly convert my R data.frame to a numpy array. Full code and error below. Please have a look what I do wrong. Explanation appreciated because my knowledge of Python internals is quite poor.

library(reticulate)
library(magrittr)

reticulate::py_install(c("numpy", "pandas", "tslearn"))
#> Using virtual environment '/home/rstudio/.virtualenvs/r-reticulate' ...
#> + /home/rstudio/.virtualenvs/r-reticulate/bin/python -m pip install --upgrade --no-user numpy pandas tslearn

# Simulate rhythmic time series
time <- seq(1, 21, 2)

time_series <- 
  lapply(seq(0, 1, .2), function(x) x*sin(2*pi*time/24)) %>%
  unlist %>%
  matrix(., byrow=TRUE, ncol=length(time)) %>%
  data.frame %>% setNames(paste0("time", time))

# Convert time_series to numpy array
np <- reticulate::import("numpy")
ts_np <- r_to_py(np$array(time_series))

# Run https://tslearn.readthedocs.io/en/latest/gen_modules/clustering/tslearn.clustering.TimeSeriesKMeans.html via reticulate
ts <- reticulate::import("tslearn.clustering")
tsk <- ts$TimeSeriesKMeans(n_clusters=2, metric="dtw")

# Problematic line
try(tsk$fit(ts_np))
#> Error in py_call_impl(callable, call_args$unnamed, call_args$named) : 
#>   TypeError: 'float' object cannot be interpreted as an integer
#> Run `reticulate::py_last_error()` for details.

reticulate::py_list_packages()
#>                package version                requirement
#> 1            contourpy   1.1.0           contourpy==1.1.0
#> 2               cycler  0.11.0             cycler==0.11.0
#> 3         dtaidistance  2.3.10       dtaidistance==2.3.10
#> 4            fonttools  4.42.0          fonttools==4.42.0
#> 5                 h5py   3.9.0                h5py==3.9.0
#> 6   importlib-metadata   6.8.0  importlib-metadata==6.8.0
#> 7  importlib-resources   6.0.1 importlib-resources==6.0.1
#> 8               joblib   1.3.2              joblib==1.3.2
#> 9           kiwisolver   1.4.4          kiwisolver==1.4.4
#> 10            llvmlite  0.41.0           llvmlite==0.41.0
#> 11          matplotlib   3.7.2          matplotlib==3.7.2
#> 12             natsort   8.4.0             natsort==8.4.0
#> 13            networkx     3.1              networkx==3.1
#> 14               numba  0.58.0              numba==0.58.0
#> 15               numpy  1.24.4              numpy==1.24.4
#> 16           packaging    23.1            packaging==23.1
#> 17              pandas   2.0.3              pandas==2.0.3
#> 18               patsy   0.5.3               patsy==0.5.3
#> 19              Pillow  10.0.0             Pillow==10.0.0
#> 20       pkg_resources   0.0.0       pkg_resources==0.0.0
#> 21              polars 0.18.15            polars==0.18.15
#> 22         progressbar     2.5           progressbar==2.5
#> 23             pyarrow  12.0.1            pyarrow==12.0.1
#> 24           pyparsing   3.0.9           pyparsing==3.0.9
#> 25     python-dateutil   2.8.2     python-dateutil==2.8.2
#> 26                pytz  2023.3               pytz==2023.3
#> 27        scikit-fuzzy   0.4.2        scikit-fuzzy==0.4.2
#> 28        scikit-learn   1.3.1        scikit-learn==1.3.1
#> 29               scipy  1.10.1              scipy==1.10.1
#> 30             seaborn  0.12.2            seaborn==0.12.2
#> 31                 six  1.16.0                six==1.16.0
#> 32         statsmodels  0.14.0        statsmodels==0.14.0
#> 33       threadpoolctl   3.2.0       threadpoolctl==3.2.0
#> 34             tslearn   0.6.2             tslearn==0.6.2
#> 35              tzdata  2023.3             tzdata==2023.3
#> 36                zipp  3.16.2               zipp==3.16.2
reticulate::py_version()
#> [1] '3.8'
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] magrittr_2.0.3  reticulate_1.31
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.9        rstudioapi_0.13   knitr_1.39        lattice_0.20-45  
#>  [5] R.cache_0.16.0    rlang_1.0.4       fastmap_1.1.0     fansi_1.0.3      
#>  [9] stringr_1.4.0     styler_1.10.2     highr_0.9         tools_4.2.1      
#> [13] grid_4.2.1        xfun_0.32         png_0.1-7         R.oo_1.25.0      
#> [17] utf8_1.2.2        cli_3.3.0         withr_2.5.0       htmltools_0.5.3  
#> [21] yaml_2.3.5        digest_0.6.29     lifecycle_1.0.1   Matrix_1.4-1     
#> [25] purrr_0.3.4       vctrs_0.4.1       R.utils_2.12.0    fs_1.5.2         
#> [29] glue_1.6.2        evaluate_0.16     rmarkdown_2.14    reprex_2.0.1     
#> [33] stringi_1.7.8     pillar_1.8.0      compiler_4.2.1    R.methodsS3_1.8.2
#> [37] jsonlite_1.8.0
Created on 2023-09-22 by the reprex package (v2.0.1)

The py_last_error:

> reticulate::py_last_error()

── Python Exception Message ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Traceback (most recent call last):
  File "/home/rstudio/.virtualenvs/r-reticulate/lib/python3.8/site-packages/tslearn/clustering/kmeans.py", line 821, in fit
    self._fit_one_init(X_, x_squared_norms, rs)
  File "/home/rstudio/.virtualenvs/r-reticulate/lib/python3.8/site-packages/tslearn/clustering/kmeans.py", line 675, in _fit_one_init
    self.cluster_centers_ = _k_init_metric(
  File "/home/rstudio/.virtualenvs/r-reticulate/lib/python3.8/site-packages/tslearn/clustering/kmeans.py", line 98, in _k_init_metric
    centers = numpy.empty((n_clusters, n_timestamps, n_features), dtype=X.dtype)
TypeError: 'float' object cannot be interpreted as an integer

── R Traceback ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
    ▆
 1. ├─base::try(tsk$fit(ts_np))
 2. │ └─base::tryCatch(...)
 3. │   └─base (local) tryCatchList(expr, classes, parentenv, handlers)
 4. │     └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
 5. │       └─base (local) doTryCatch(return(expr), name, parentenv, handler)
 6. └─tsk$fit(ts_np)
 7.   └─reticulate:::py_call_impl(callable, call_args$unnamed, call_args$named)

Solution

  • Thanks to rpolicastro via Slack.

    The answer is that one has to make the cluster number explicitely integer:

    tsk <- ts$TimeSeriesKMeans(n_clusters=2L, metric="dtw")

    So 2L rather than 2 which turns it integer, not float/numeric.