If anyone has a solution for how I can get this to work please let me know. I would prefer not downgrading python to 2.x.
I have tried to remaps some of the columns to different dtypes. I think python 3.x may be storing strings as unicode and perhaps pandas and/or numexpr does not support this with the versions I am on.
data = [['tom', 10], ['nick', 15], ['juli', 14]]
df = pd.DataFrame(data, columns=['Name', 'Age'])
df['Name'] = df['Name'].astype('string')
df.dtypes
df.query("'tom'")
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-37-a5f548d874ef> in <module>()
7 df['Name'] = df['Name'].astype('string')
8 df.dtypes
----> 9 df.query("'tom'")
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in query(self, expr, inplace, **kwargs)
3343 kwargs["level"] = kwargs.pop("level", 0) + 1
3344 kwargs["target"] = None
-> 3345 res = self.eval(expr, **kwargs)
3346
3347 try:
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in eval(self, expr, inplace, **kwargs)
3473 kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers)
3474
-> 3475 return _eval(expr, inplace=inplace, **kwargs)
3476
3477 def select_dtypes(self, include=None, exclude=None) -> "DataFrame":
/usr/local/lib/python3.6/dist-packages/pandas/core/computation/eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
344 eng = _engines[engine]
345 eng_inst = eng(parsed_expr)
--> 346 ret = eng_inst.evaluate()
347
348 if parsed_expr.assigner is None:
/usr/local/lib/python3.6/dist-packages/pandas/core/computation/engines.py in evaluate(self)
71
72 # make sure no names in resolvers and locals/globals clash
---> 73 res = self._evaluate()
74 return reconstruct_object(
75 self.result_type, res, self.aligned_axes, self.expr.terms.return_type
/usr/local/lib/python3.6/dist-packages/pandas/core/computation/engines.py in _evaluate(self)
112 scope = env.full_scope
113 _check_ne_builtin_clash(self.expr)
--> 114 return ne.evaluate(s, local_dict=scope)
115
116
~/.local/lib/python3.6/site-packages/numexpr/necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
813 # Create a signature
814 signature = [(name, getType(arg)) for (name, arg) in
--> 815 zip(names, arguments)]
816
817 # Look up numexpr if possible.
~/.local/lib/python3.6/site-packages/numexpr/necompiler.py in <listcomp>(.0)
812
813 # Create a signature
--> 814 signature = [(name, getType(arg)) for (name, arg) in
815 zip(names, arguments)]
816
~/.local/lib/python3.6/site-packages/numexpr/necompiler.py in getType(a)
689 return bytes
690 if kind == 'U':
--> 691 raise ValueError('NumExpr 2 does not support Unicode as a dtype.')
692 raise ValueError("unknown type %s" % a.dtype.name)
693
ValueError: NumExpr 2 does not support Unicode as a dtype.
The only reason you have a scuffed error message that references anything about dtypes, is because you're using the NumExpr
engine.
Here, using the python
engine, getting a KeyError
is clearer:
>>> df.query("'tom'", engine='python')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/frame.py", line 3348, in query
result = self.loc[res]
File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexing.py", line 879, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexing.py", line 1110, in _getitem_axis
return self._get_label(key, axis=axis)
File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexing.py", line 1059, in _get_label
return self.obj.xs(label, axis=axis)
File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/generic.py", line 3493, in xs
loc = self.index.get_loc(key)
File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexes/range.py", line 358, in get_loc
raise KeyError(key)
KeyError: 'tom'
As wjandrea pointed out... this isn't a valid query statement to begin with... did you mean?:
>>> df.query("Name == 'tom'")
Name Age
0 tom 10