pythonpython-3.xpandasdaskmodin

import modin.pandas causes ERROR: AttributeError: type object 'pyarrow.lib.Message' has no attribute '__reduce_cython__'


Issue

Tried solutions

Error Message

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-1-f6aea802f5ba> in <module>
      2 import openpyxl
      3 
----> 4 import modin.pandas as pd
      5 ##import pandas as pd
      6 

~/anaconda3/envs/tfall/lib/python3.7/site-packages/modin/pandas/__init__.py in <module>
    170 
    171 from .. import __version__
--> 172 from .dataframe import DataFrame
    173 from .io import (
    174     read_csv,

~/anaconda3/envs/tfall/lib/python3.7/site-packages/modin/pandas/dataframe.py in <module>
     46 from .series import Series
     47 from .base import BasePandasDataset, _ATTRS_NO_LOOKUP
---> 48 from .groupby import DataFrameGroupBy
     49 from .accessor import CachedAccessor, SparseFrameAccessor
     50 

~/anaconda3/envs/tfall/lib/python3.7/site-packages/modin/pandas/groupby.py in <module>
     32     wrap_into_list,
     33 )
---> 34 from modin.backends.base.query_compiler import BaseQueryCompiler
     35 from modin.data_management.functions.default_methods.groupby_default import GroupBy
     36 from modin.config import IsExperimental

~/anaconda3/envs/tfall/lib/python3.7/site-packages/modin/backends/__init__.py in <module>
     17 __all__ = ["BaseQueryCompiler", "PandasQueryCompiler"]
     18 try:
---> 19     from .pyarrow import PyarrowQueryCompiler  # noqa: F401
     20 except ImportError:
     21     pass

~/anaconda3/envs/tfall/lib/python3.7/site-packages/modin/backends/pyarrow/__init__.py in <module>
     14 """The module represents the query compiler level for the PyArrow backend."""
     15 
---> 16 from .query_compiler import PyarrowQueryCompiler
     17 
     18 __all__ = ["PyarrowQueryCompiler"]

~/anaconda3/envs/tfall/lib/python3.7/site-packages/modin/backends/pyarrow/query_compiler.py in <module>
     26 from pandas.core.computation.ops import UnaryOp, BinOp, Term, MathCall, Constant
     27 
---> 28 import pyarrow as pa
     29 import pyarrow.gandiva as gandiva
     30 

~/anaconda3/envs/tfall/lib/python3.7/site-packages/pyarrow/__init__.py in <module>
     52 
     53 
---> 54 from pyarrow.lib import cpu_count, set_cpu_count
     55 from pyarrow.lib import (null, bool_,
     56                          int8, int16, int32, int64,

~/anaconda3/envs/tfall/lib/python3.7/site-packages/pyarrow/ipc.pxi in init pyarrow.lib()

AttributeError: type object 'pyarrow.lib.Message' has no attribute '__reduce_cython__'


Solution

  • LINK

    Have not worked

    The following gives me error:

    1.code

    conda create --name py37-install-4719 python=3.7
    conda activate py37-install-4719
    conda install modin modin-all modin-core modin-dask modin-omnisci modin-ray
    

    1.output

    RUNS for hours on a AWS ec2 g4dn.4xlarge with no other load I have monitored it with htop

    Collecting package metadata (current_repodata.json): done
    Solving environment: failed with initial frozen solve. Retrying with flexible solve.
    Solving environment: failed with repodata from current_repodata.json, will retry with next repodata source.
    Collecting package metadata (repodata.json): done
    Solving environment: |
    

    This have worked

    Previously pip install modin[all] code might gave me an error because of:

    Currently Fully working solution

    conda create --name py37-install-4719-pip python=3.7
    conda activate py37-install-4719-pip
    pip install modin[all]
    

    Ex Code Usage

    import modin.pandas as pd
    from distributed import Client
    client = Client()
    df = pd.read_csv('my_single_csv_name.csv')