pythonmatplotlibscatter-plotdensity-plotdatashader

1D scatter plot colored by density


I was wondering whether it was possible to use mpl-scatter-density or datashader to create a 1D scatter plot colored by density as it is shown here for the 2D case.

It can be done with gaussian_kde, but it is quite slow when the number of points that I want to represent are above 10k.

Also, is it there a way to do the mpl-scatter-density approach without defining axis as fig.add_subplot(1, 1, 1, projection='scatter_density') and by just creating them with plt.subplots?

I have tried to do so by using the ScatterDensityArtist from mpl_scatter_density without success.

Here is some sample code of the 1D scatter plot colored by density using gaussian_kde.

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# Generate fake data
data_x = np.broadcast_to(np.array([[1], [2], [3]]), (2, 3, 1000))
data_y = data_x*np.random.normal(size=(2,3,1000))

# Create subplots
nrows = 1
ncols = 2
size = 5
fig, ax_array = plt.subplots(
    nrows,
    ncols,
    figsize=(16/9*ncols*size,nrows*size),
    squeeze=False
)
for i,ax_row in enumerate(ax_array):
    for j,axes in enumerate(ax_row):
        
        index = nrows*i+j
        
        x = data_x[index,:,:]
        y = data_y[index,:,:]
        
        for x_values,y_values in zip(x,y):
            z_values = gaussian_kde(y_values)(y_values)
            idx = z_values.argsort()

            x_values, y_values, z_values = x_values[idx], y_values[idx], z_values[idx]

            axes.scatter(
                x_values,y_values,
                c=z_values, s=10,
                cmap=plt.cm.get_cmap('Reds')
            )
plt.show()

the previous code produces this image.


Solution

  • Rather than trying to evaluate the KDE at all the y_values, which is going to be slow when it consists of many points, you can evaluate it at fewer points (say 100) and then use those to interpolate to get the required z_values. I find this is of order 50 times quicker (when y_values has 10000 samples and the interpolator evaluates the KDE at 100 points). E.g.,

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.stats import gaussian_kde
    
    # get interp1d
    from scipy.interpolate import interp1d
    
    
    # create interpolator
    def interpolated(y_values, npoints=100):
        # set y values at which to interpolate
        yvals = np.linspace(y_values.min(), y_values.max(), npoints, endpoint=True)
    
        # generate the interpolation function
        ifunc = interp1d(yvals, gaussian_kde(y_values)(yvals))
        return ifunc(y_values)
    
    
    # Generate fake data
    data_x = np.broadcast_to(np.array([[1], [2], [3]]), (2, 3, 10000))
    data_y = data_x*np.random.normal(size=(2,3,10000))
    
    # Create subplots
    nrows = 1
    ncols = 2
    size = 5
    fig, ax_array = plt.subplots(
        nrows,
        ncols,
        figsize=(16/9*ncols*size,nrows*size),
        squeeze=False
    )
    
    cmap = plt.cm.get_cmap('Reds')
    
    for i,ax_row in enumerate(ax_array):
        for j,axes in enumerate(ax_row):
            
            index = nrows*i+j
            
            x = data_x[index,:,:]
            y = data_y[index,:,:]
            
            for x_values,y_values in zip(x,y):
                # use interpolator to get z_values
                z_values = interpolated(y_values)
                
                idx = z_values.argsort()
    
                x_values, y_values, z_values = x_values[idx], y_values[idx], z_values[idx]
    
                axes.scatter(
                    x_values,y_values,
                    c=z_values, s=10,
                    cmap=cmap
                )
    plt.show()