pythonparallel-processingjoblib

A Python function did not work correctly when run on multiple cores, but runs normally on a single core


This function did not work correctly when n_jobs!=1

from joblib import Parallel, delayed
import numpy as np 
import pandas as pd

def foo(n_jobs):

    result = {}

    x = np.sin(4 * np.pi * np.arange(0, 1, 0.001))
    y = np.sin(8 * np.pi * np.arange(0, 1, 0.001) + np.pi/2)

    x2yT = np.zeros(x.shape[0])
    y2xT = np.zeros(x.shape[0])
    
    def parallelize(ite):
        
        xi = x[ite] * 2
        yi = y[ite] + 1
        y2xT[ite] = xi
        x2yT[ite] = yi

    r = Parallel(n_jobs=n_jobs)(delayed(parallelize)(i) for i in np.arange(x.shape[0]))

    result[f'y2xT'] = y2xT
    result[f'x2yT'] = x2yT
    return pd.DataFrame(result)

The following code snippet produces a plot using a function foo with a single core:

r0 = foo( n_jobs = 1)
r0.plot()

The resulting plot is shown below:

A.

The following code snippet produces a plot using a function foo with multiple cores:

r0 = foo( n_jobs = -1)
r0.plot()

The resulting plot is shown below:

B

How can I ensure that the function utilizes multiple cores correctly


Solution

  • According to the comment from Frank, I tried to convert y2xT and x2yT from numpy.ndarray to numpy.memmap, and it works!

    def foo(n_jobs):
    
        result = {}
            
        x = np.sin(4 * np.pi * np.arange(0, 1, 0.001))
        y = np.sin(8 * np.pi * np.arange(0, 1, 0.001) + np.pi/2)
    
    
        # x2yT = np.zeros(x.shape[0])
        # y2xT = np.zeros(x.shape[0])
        
        x2yT = np.memmap('x2yT.pkl', dtype=np.float32, shape=x.shape[0], mode='w+')
        y2xT = np.memmap('y2xT.pkl', dtype=np.float32, shape=x.shape[0], mode='w+')
        
        def parallelize(ite):
            
            xi = x[ite] * 2
            yi = y[ite] + 1
            y2xT[ite] = xi
            x2yT[ite] = yi
    
        r = Parallel(n_jobs=n_jobs)(delayed(parallelize)(i) for i in np.arange(x.shape[0]))
    
        result[f'y2xT'] = y2xT
        result[f'x2yT'] = x2yT
    
    
        return pd.DataFrame(result)
    
    r0 = foo( n_jobs = -1)
    r0.plot()