_metadata original properties are not pased to pyjanitor manipulation results
Take the following MWE:
import pandas as pd
import janitor # noqa: F401
import pandas_flavor as pf
# See: https://pandas.pydata.org/pandas-docs/stable/development/extending.html#define-original-properties
class MyDataFrame(pd.DataFrame):
# normal properties
_metadata = ["myvar"]
@property
def _constructor(self):
return MyDataFrame
@pf.register_dataframe_method
def regvar(self):
obj = MyDataFrame(self)
obj.myvar = 2
return obj
@pf.register_dataframe_method
def printvar(self):
print(self.myvar)
return self
df = pd.DataFrame(
{
"Year": [1999, 2000, 2004, 1999, 2004],
"Taxon": [
"Saccharina",
"Saccharina",
"Saccharina",
"Agarum",
"Agarum",
],
"Abundance": [4, 5, 2, 1, 8],
}
)
Now:
df2 = df.regvar().query("Taxon=='Saccharina'").printvar()
This correctly returns 2
.
However:
index = pd.Index(range(1999,2005),name='Year')
df2 = df.regvar().complete(index, "Taxon", sort=True).printvar()
Returns an Exception:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_4412\627945022.py in ?()
39
40 df2 = df.regvar().query("Taxon=='Saccharina'").printvar()
41
42 index = pd.Index(range(1999,2005),name='Year')
---> 43 df2 = df.regvar().complete(index, "Taxon", sort=True).printvar()
HOME\venvs\base\Lib\site-packages\pandas_flavor\register.py in ?(self, *args, **kwargs)
160 object: The result of calling of the method.
161 """
162 global method_call_ctx_factory
163 if method_call_ctx_factory is None:
--> 164 return method(self._obj, *args, **kwargs)
165
166 return handle_pandas_extension_call(
167 method, method_signature, self._obj, args, kwargs
~\AppData\Local\Temp\ipykernel_4412\627945022.py in ?(self)
21 @pf.register_dataframe_method
22 def printvar(self):
---> 23 print(self.myvar)
24 return self
HOME\venvs\base\Lib\site-packages\pandas\core\generic.py in ?(self, name)
6295 and name not in self._accessors
6296 and self._info_axis._can_hold_identifiers_and_holds_name(name)
6297 ):
6298 return self[name]
-> 6299 return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'myvar'
pandas 2.2.3 janitor 0.31.0
Janitor functions usually create new data structures either from scratch - using pandas.DataFrame
as a blueprint - or by combining data with methods like pandas.merge
. In the first case, we get a fresh DataFrame with the default _metadata
. In the second, the returned object may not retain the original type, nor is its metadata necessarily preserved (see related discussions in the pandas-dev GitHub repository).
To ensure that metadata is maintained, we can define a custom piping function. For example:
@pf.register_dataframe_method
def pipe_meta(data: pd.DataFrame, func: callable, *args, **kwargs):
obj = func(data, *args, **kwargs)
if isinstance(obj, pd.DataFrame):
obj = data.__class__(obj)
for meta in data._metadata:
setattr(obj, meta, getattr(data, meta, None))
return obj
Then, we can complete the initial task like this:
df2 = df.regvar().pipe_meta(janitor.complete, index, "Taxon", sort=True).printvar()