The goal is:
I can't seem to get past that last step.
# R
library(reticulate)
library(dplyr)
library(arrow)
# Python
import pyarrow
import pandas
arrow_dat <- arrow::as_arrow_table(tibble(col = c(1,2,3)))
py_taxa_arrow <- r_to_py(arrow_dat)
r.py_taxa_arrow
pyarrow.Table
col: double
----
col: [[1,2,3]]
py_taxa_arrow_to_pd = r.py_taxa_arrow.to_pandas()
py_taxa_arrow_edited = pandas.DataFrame(py_taxa_arrow_to_pd) + 1
py_taxa_arrow_edited
col
0 2.0
1 3.0
2 4.0
py_taxa_arrow_edited_converted = pyarrow.Table.from_pandas(py_taxa_arrow_edited)
py_taxa_arrow_edited_converted
pyarrow.Table
col: double
----
col: [[2,3,4]]
py_taxa_arrow_edited <- r_to_py(py$py_taxa_arrow_edited_converted)
py_taxa_arrow_edited
pyarrow.Table
col: double
----
col: [[2,3,4]]
tibble(py_taxa_arrow_edited)
Error in `tibble()`:
! All columns in a tibble must be vectors.
ā Column `py_taxa_arrow_edited` is a
`pyarrow.lib.Table/pyarrow.lib._Tabular/pyarrow.lib._PandasConvertible/pyarrow.lib._Weakrefable/python.builtin.object`
object.
Backtrace:
1. tibble::tibble(py_taxa_arrow_edited)
Error in tibble(py_taxa_arrow_edited) :
ā Column `py_taxa_arrow_edited` is a
`pyarrow.lib.Table/pyarrow.lib._Tabular/pyarrow.lib._PandasConvertible/pyarrow.lib._Weakrefable/python.builtin.object`
object.
9.
stop(fallback)
8.
signal_abort(cnd, .file)
7.
abort(x, class, ..., call = call, parent = parent, use_cli_format = TRUE)
6.
tibble_abort(call = call, problems("All columns in a tibble must be vectors:",
x = paste0("Column ", name_or_pos(names, positions), " is ",
classes)), names = names)
5.
abort_column_scalar_type(names_x[is_xd], pos[is_xd], classes,
call)
4.
check_valid_cols(set_names(list(x), name), call = call)
3.
check_valid_col(res, col_names[[j]], j, call)
2.
tibble_quos(xs, .rows, .name_repair)
1.
tibble(py_taxa_arrow_edited)
Edit: Final Solution
# R
arrow_dat <- arrow::as_arrow_table(tibble(col = c(1,2,3)))
py_taxa_arrow <- r_to_py(arrow_dat)
# Python
py_taxa_arrow_to_pd = r.py_taxa_arrow.to_pandas()
py_taxa_arrow_edited = py_taxa_arrow_to_pd + 1
py_taxa_arrow_edited_converted = pyarrow.Table.from_pandas(py_taxa_arrow_edited)
# R
as_tibble(py$py_taxa_arrow_edited_converted)
col
<dbl>
2
3
4
There are two issues with your reprex:
r_to_py
(a noop as it is already in python) instead of py_to_r
as_tibble
instead of tibble
correctly converts it to a tibbleI have recreated the fixed reprex so it can be run in one go:
library(reticulate)
library(dplyr)
library(arrow)
pa <- import('pyarrow')
pd <- import('pandas')
arrow_dat <- arrow::as_arrow_table(tibble(col = c(1,2,3)))
# Convert to python
py_taxa_arrow <- r_to_py(arrow_dat)
py_taxa_arrow
# Do stuff
py_taxa_arrow_to_pd = py_taxa_arrow$to_pandas()
py_taxa_arrow_to_pd
py_taxa_arrow_edited <- py_taxa_arrow_to_pd + 1
py_taxa_arrow_edited
# Convert back
# py_to_r converts to data.frame
# pa$Table$from_pandas converts to Arrow table, so you want to use
# that if you have larger (than memory) data.
py_taxa_arrow_edited_converted <- pa$Table$from_pandas(py_taxa_arrow_edited)
py_taxa_arrow_edited_converted
as_tibble(py_taxa_arrow_edited_converted)