I have a custom class and I want to serialize it for multiprocessing, but pickle
and dill
doesn't work fine and loses important data. How can I fix this?
My class:
import pickle
import dill
import pandas as pd
import numpy as np
class C(pd.Series):
def __init__(self, value: str, *args, **kwargs):
super().__init__(*args, **kwargs, dtype=np.float64)
self.value = value
@property
def value(self):
try:
return self._value
except AttributeError:
self._value = None
return self._value
@value.setter
def value(self, value: str):
self._value = value
c1 = C(data=[0], index=[0], value=1)
c2 = pickle.loads(pickle.dumps(c1))
print(c1.value) # prints 1
print(c2.value) # prints None
c2 = dill.loads(dill.dumps(c1))
print(c1.value) # prints 1
print(c2.value) # prints None
As @gog commented, you need to provide your own implementations of __getstate__
and __setstate__
.
The pandas
implementation of __getstate__
returns a dictionary but this does not contain any user-set attributes. So we must explicitly add the additional key/value pair:
import pickle
import dill
import pandas as pd
import numpy as np
class C(pd.Series):
def __init__(self, value: str, *args, **kwargs):
super().__init__(*args, **kwargs, dtype=np.float64)
self.value = value
@property
def value(self):
try:
return self._value
except AttributeError:
self._value = None
return self._value
@value.setter
def value(self, value: str):
self._value = value
def __getstate__(self):
the_dict = super().__getstate__()
the_dict['_value'] = self._value
return the_dict
def __setstate__(self, state):
super().__setstate__(state)
self._value = state['_value']
c1 = C(data=[0], index=[0], value=1)
c2 = pickle.loads(pickle.dumps(c1))
print(c1.value) # prints 1
print(c2.value) # prints 1
c2 = dill.loads(dill.dumps(c1))
print(c1.value) # prints 1
print(c2.value) # prints 1
Prints:
1
1
1
1