pythonmultiprocessingpathos

Multiprocessing return objects created by process


I am using multiprocessing lib for accelerating process of class creation. Minimal example of my code:

from matplotlib.backends.backend_qt5agg import FigureCanvas
class Custom_Class(FigureCanvas)
    .
    .
    .

def generate_class_func(list_of_dfs, arg1, arg2):
    list_of_custom_classes = list()
    for df in list_of_dfs:
        custom_class = Custom_Class(df, arg1, arg2)
        list_of_custom_classes.append(custom_class)
    return list_of_custom_classes

def main():
    import multiprocessing as mp
    with mp.Pool() as p:
        list_of_classes_list = p.starmap(generate_class_func, zip(list_of_dfs, repeat(arg1), repeat(arg2)))
        p.close()
        p.join()
    

if __name__ == '__main__':
    main()

However, I got multiprocess.pool.MaybeEncodingError: Error sending result: ... Reason: 'TypeError("cannot pickle 'Custom_Class' object")' error. I also tried pathos.ProcessingPool module but the error is same. How can I return custom objects with multiprocessing?


Solution

  • I'm the author of dill and multiprocess. One easy trick to make thing more serializable is to use multiprocess instead of multiprocessing. The former is a fork of the latter that uses dill instead of pickle, so you immediately get the ability to serialize more objects, including most custom classes.

    >>> from matplotlib.pylab import FigureCanvasBase
    >>> class Custom_Class(FigureCanvasBase):
    ...   def __init__(self, df, arg1, arg2):
    ...     super().__init__()
    ...     self.df = df
    ...     self.arg1 = arg1
    ...     self.arg2 = arg2
    ... 
    >>> def generate_class_func(list_of_dfs, arg1, arg2):
    ...     list_of_custom_classes = list()
    ...     for df in list_of_dfs:
    ...         custom_class = Custom_Class(df, arg1, arg2)
    ...         list_of_custom_classes.append(custom_class)
    ...     return list_of_custom_classes
    ... 
    >>> 
    >>> import multiprocess as mp
    >>> with mp.Pool() as p:
    ...   from itertools import repeat
    ...   list_of_classes_list = p.starmap(generate_class_func, zip([[0,1],[2,3],[4,5],[6,7]], repeat('arg1'), repeat('arg2')))
    ...   p.close()
    ...   p.join()
    ... 
    >>> list_of_classes_list
    [[<__main__.Custom_Class object at 0x11b35a390>, <__main__.Custom_Class object at 0x11b0fcb10>], [<__main__.Custom_Class object at 0x11b373110>, <__main__.Custom_Class object at 0x11ae1fa10>], [<__main__.Custom_Class object at 0x11b37b990>, <__main__.Custom_Class object at 0x11b3965d0>], [<__main__.Custom_Class object at 0x11b114f90>, <__main__.Custom_Class object at 0x11adfdb90>]]