pythonmatplotlibseaborn

How to add extra graphical information to a ridge plot


My data consists of several nested categories, for each category I am able to generate a stacked density plot such as the one illustrated here

Since I have several such density plots each having data in the x domain 0 till 100. For each stacked density plot I would like a single ridge plot. The end result would be a plot of Ridge plots where each row is a single stacked density plot. Is this possible?

Due to the nature of the ridge plots of having each plot obscuring the previous plot, I think the area under the curves of the stacked density plots may be misinterpreted by the observer as some section of the curve may be hidden by the next ridge plot. Hence I would like to drop the idea of having a stacked density plot in each ridge plot. But I would like to plot each variable as a ridge, but this time to include the mean and the standard deviation lines and have the area under the curve between both standard deviation lines shaded.

As requested (by JohanC), below is the code I would like to seek assistance on. Somehow I am unable to get rid of the "Density" label on the y-axis.

# seaborn ridge plots with penguins dataset
import logging;
import pandas as pd;
import pandas;
import matplotlib.pyplot as plt;
import numpy as np;

#!pip install seaborn;
import seaborn as sns;

LOG_FORMAT=("%(levelname) -5s time:%(asctime)s [%(funcName) ""-5s %(lineno) -5d]: %(message)s");
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT);
LOGGER = logging.getLogger(__name__);
logger_obj: logging.Logger=LOGGER;


my_df = sns.load_dataset("penguins");

sns.set_theme(style="white", rc={"axes.facecolor": (1, 1, 1, 1)});#background transparency

import errno;
def mkdir_p(path):
    if(not(os.path.exists(path) and os.path.isdir(path))):
        try:
            os.makedirs(path,exist_ok=True);
        except OSError as exc:  # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass;
            else:
                raise exc;   



def generate_plot(
    logger_obj: logging.Logger
    ,my_df: pandas.DataFrame
    ,sample_size: int
    ,axs2
):
    my_df2 = my_df.copy(deep=True);
    species_list: list=list(my_df2["species"].unique());
    my_df3: pd.DataFrame();
    sample_size2: int=sample_size;
    for i2, species in enumerate(species_list):
        species_record_count=len(my_df2[my_df2["species"]==species]);
        flipper_length_mm_sum=my_df2[(my_df2["species"]==species)]["flipper_length_mm"].sum();
        logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum));
        if sample_size2>species_record_count:
            sample_size2=species_record_count;
    for i2, species in enumerate(species_list):
        my_df4=my_df2[my_df2["species"]==species].sample(sample_size2);
        species_record_count=len(my_df4);
        flipper_length_mm_sum=my_df4["flipper_length_mm"].sum();
        logger_obj.info("species is :'{0}', count is:{1}, flipper_length_mm_sum is:{2}".format(species, species_record_count, flipper_length_mm_sum));
        if i2==0:
            my_df3=my_df4[:];
        else:
            my_df3=pd.concat([my_df3, my_df4], ignore_index=True);
    if 1==1:
        sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth':2});
        palette = sns.color_palette("Set2", 12);
        g = sns.FacetGrid(data=my_df3, palette=palette, row="species", hue="species", aspect=9, height=1.2)
        
        sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)});
        g.map_dataframe(sns.kdeplot, x="flipper_length_mm", fill=True, alpha=1);
        g.map_dataframe(sns.kdeplot, x="flipper_length_mm", color="white");
        
        def label_f(x, color, label):
            ax2=plt.gca();
            ax2.text(0, .2, label, color="black", fontsize=13, ha="left", va="center", transform=ax2.transAxes);
        
        g.map(label_f, "species");
        g.fig.subplots_adjust(hspace=-.5);
        g.set_titles("");
        g.set(yticks=[], xlabel="flipper_length_mm");
        g.set_titles(col_template="", row_template="");
        g.despine(left=True);
        image_png_fn: str="images/penguins.ridge_plot/sample_day_feature.flipper_length_mm.all_species.png";
        logger_obj.info("image_png_fn is :'{0}'".format(image_png_fn));
        mkdir_p(os.path.abspath(os.path.join(image_png_fn, os.pardir)));
        plt.savefig(image_png_fn);
        image_png_fn=None;
    

sample_size: int=30000;
generate_plot(
    logger_obj
    ,my_df
    ,sample_size
    ,None
);

enter image description here


Solution

  • Here is how a ridge plot for the penguins dataset could look like, with the mean and a region for the standard deviation added per subplot. A lot of tweaking might be needed for you specific situation. (To remove the y-labels, you can use g.set(..., ylabel='')).

    from matplotlib import pyplot as plt
    import seaborn as sns
    import numpy as np
    
    penguins = sns.load_dataset('penguins')
    sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth': 2});
    palette = sns.color_palette("Set2", 12);
    
    g = sns.FacetGrid(data=penguins, palette=palette, row="species", hue="species", aspect=9, height=1.2)
    for (species, ax), color in zip(g.axes_dict.items(), palette):
        # select the subset to be shown in the current subplot
        x = penguins[penguins['species'] == species]['flipper_length_mm'].values
        sns.kdeplot(x=x, color='white', fill=False, ax=ax)
        mean = np.nanmean(x)
        std = np.nanstd(x)
        kdeline = ax.lines[0]
        xs = kdeline.get_xdata()
        ys = kdeline.get_ydata()
        height = np.interp(mean, xs, ys)
        ax.vlines(mean, 0, height, color='black', ls=':')
        ax.fill_between(xs, 0, ys, facecolor=color, alpha=1)
        # filter the region where x within one standard deviation of the mean
        sd_filter = (xs >= mean - std) & (xs <= mean + std)
        # show this region with a darker color
        ax.fill_between(xs[sd_filter], 0, ys[sd_filter], facecolor='black', alpha=0.06)
        ax.text(0, .05, species, color="black", fontsize=13, ha="left", va="bottom", transform=ax.transAxes);
    g.fig.subplots_adjust(hspace=-.5, bottom=.15)
    g.despine(left=True)
    g.set(title='', ylabel='', yticks=[], xlabel="flipper length (mm)")
    plt.show()
    

    seaborn ridge plot with mean and sdevs