I am attempting to use multiprocessing for the generation of complex, unpickable, objects as per the following code snippet:
from multiprocessing import Manager
from pathos.multiprocessing import ProcessingPool
class Facility:
def __init__(self):
self.blocks = Manager().list()
def __process_blocks(self, block):
designer = block["designer"]
apply_terrain = block["terrain"]
block_type = self.__block_type_to_string(block["type"])
block = designer.generate_block(block_id=block["id"],
block_type=block_type,
anchor=Point(float(block["anchor_x"]), float(block["anchor_y"]),
float(block["anchor_z"])),
pcu_anchor=Point(float(block["pcu_x"]), float(block["pcu_y"]), 0),
corridor_width=block["corridor"],
jb_height=block["jb_connect_height"],
min_boxes=block["min_boxes"],
apply_terrain=apply_terrain)
self.blocks.append(block)
def design(self, apply_terrain=False):
designer = FacilityBuilder(string_locator=self._string_locator, string_router=self._string_router,
box_router=self._box_router, sorter=self._sorter,
tracker_configurator=self._tracker_configurator, config=self._config)
blocks = [block.to_dict() for index, block in self._store.get_blocks().iterrows()]
for block in blocks:
block["designer"] = designer
block["terrain"] = apply_terrain
with ProcessingPool() as pool:
pool.map(self.__process_blocks, blocks)
(Struggling to reproduce this with simpler code so I am showing actual code)
I need to update a shareable variable so I initialise a class level variable using a multiprocessing.Manager
as follows:
self.blocks = Manager().list()
This leaves me with the following error (only partial stacktrace):
File "C:\Users\Paul.Nel\Documents\repos\autoPV\.autopv\lib\site-packages\dill\_dill.py", line 481, in load
obj = StockUnpickler.load(self)
File "C:\Users\Paul.Nel\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 933, in RebuildProxy
return func(token, serializer, incref=incref, **kwds)
File "C:\Users\Paul.Nel\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 783, in __init__
self._incref()
File "C:\Users\Paul.Nel\AppData\Local\Programs\Python\Python39\lib\multiprocessing\managers.py", line 837, in _incref
conn = self._Client(self._token.address, authkey=self._authkey)
File "C:\Users\Paul.Nel\AppData\Local\Programs\Python\Python39\lib\multiprocessing\connection.py", line 513, in Client
answer_challenge(c, authkey)
File "C:\Users\Paul.Nel\AppData\Local\Programs\Python\Python39\lib\multiprocessing\connection.py", line 764, in answer_challe
nge
raise AuthenticationError('digest sent was rejected')
multiprocessing.context.AuthenticationError: digest sent was rejected
As last resort I tried to use python
's standard ThreadPool
implementation to try and circumvent the pickle
issue but this has not gone well either. I have read about many similar issues but have not found the solution to this particular problem. Is the problem with dill
or with the way pathos
interfaces with mulitprocessing.Manager
?
EDIT: So I managed to replicate this with sample code as follows:
import os
import math
from multiprocessing import Manager
from pathos.multiprocessing import ProcessingPool
class MyComplex:
def __init__(self, x):
self._z = x * x
def me(self):
return math.sqrt(self._z)
class Starter:
def __init__(self):
manager = Manager()
self.my_list = manager.list()
def _f(self, value):
print(f"{value.me()} on {os.getpid()}")
self.my_list.append(value.me)
def start(self):
names = [MyComplex(x) for x in range(100)]
with ProcessingPool() as pool:
pool.map(self._f, names)
if __name__ == '__main__':
starter = Starter()
starter.start()
The error occurs when I add self.my_list = manager.list()
.
So I have resolved this issue. I would still be great if someone like mmckerns or someone else with more knowledge than me on multiprocessing could comment on why this is a solution.
The issue seemed to have been that the Manager().list()
was declared in __init__
. The following code works without any issues:
import os
import math
from multiprocessing import Manager
from pathos.multiprocessing import ProcessingPool
class MyComplex:
def __init__(self, x):
self._z = x * x
def me(self):
return math.sqrt(self._z)
class Starter:
def _f(self, value):
print(f"{value.me()} on {os.getpid()}")
return value.me()
def start(self):
manager = Manager()
my_list = manager.list()
names = [MyComplex(x) for x in range(100)]
with ProcessingPool() as pool:
my_list.append(pool.map(self._f, names))
print(my_list)
if __name__ == '__main__':
starter = Starter()
starter.start()
Here I declare the list
local to the ProcessingPool
operation. I can assign the result to a class level variable afterwards if I choose.