pandasmatplotlibplotseabornvisualization

Plotting multivariable plot in seaborn using Pandas DataFrame


Reproducing the data frame as showed :

dx = {'SEGM':[9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
'YES':[4812, 2697, 2677, 1811, 1570, 1010, 418, 210, 115, 166],
'NO':[590, 1368, 2611, 2968, 3942, 3596, 2141, 1668, 1528, 11055],
'TOT':[5402, 4065, 5288, 4779, 5512, 4606, 2559, 1878, 1643, 11221],
'YES%':[89.08, 66.35, 50.62, 37.89, 28.48, 21.93, 16.33, 11.18, 7.0, 1.48],
'TOTCUM%':[11.51, 20.16, 31.43, 41.6, 53.34, 63.15, 68.6, 72.6, 76.1, 100.0]}
dx = pd.DataFrame(dx)
dx

Expected plot in python seaborn/mathplotlib I have created in excel for better visual expectation

Expected plots

Tried with below but not working

g=sns.barplot(dx.SEGM, dx['YES'], alpha=0.8)
sns.set_style("whitegrid", {'axes.grid' : False})
plt.title(f'{cat_col} with {vol_inperc}%', fontsize = 16,color='blue')
plt.ylabel('Volume', fontsize=12)
plt.xlabel(f'{cat_col}', fontsize=12)
plt.xticks(rotation=90)
for p in g.patches:
    height = p.get_height()
    g.text(p.get_x()+p.get_width()/2.,
        height + 3,
        '{}\n{:1.2f}%'.format(round(height),height/len(df)*100),
        ha="center", fontsize=10, color='blue')

gt = g.twinx()

Solution

  •     import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Data
    dx = {'SEGM':[9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
          'YES':[4812, 2697, 2677, 1811, 1570, 1010, 418, 210, 115, 166],
          'NO':[590, 1368, 2611, 2968, 3942, 3596, 2141, 1668, 1528, 11055],
          'TOT':[5402, 4065, 5288, 4779, 5512, 4606, 2559, 1878, 1643, 11221],
          'YES%':[89.08, 66.35, 50.62, 37.89, 28.48, 21.93, 16.33, 11.18, 7.0, 1.48],
          'TOTCUM%':[11.51, 20.16, 31.43, 41.6, 53.34, 63.15, 68.6, 72.6, 76.1, 100.0]}
    dx = pd.DataFrame(dx)
    
    # Plotting
    fig, ax1 = plt.subplots(figsize=(12, 8))
    
    bar_width = 0.25
    index = dx.index
    
    # Bar plots side by side
    bar1 = ax1.bar(index - bar_width, dx['YES'], bar_width, label='YES', color='b')
    bar2 = ax1.bar(index, dx['NO'], bar_width, label='NO', color='r')
    bar3 = ax1.bar(index + bar_width, dx['TOT'], bar_width, label='TOT', color='g')
    
    # Adding data labels to bar plots
    for bar in bar1 + bar2 + bar3:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width() / 2.0, height, f'{height}', ha='center', va='bottom')
    
    # Labels for bar plot
    ax1.set_xlabel('SEGM')
    ax1.set_ylabel('Counts')
    ax1.set_xticks(index)
    ax1.set_xticklabels(dx['SEGM'])
    ax1.legend()
    
    # Secondary axis
    ax2 = ax1.twinx()
    
    # Line plots
    line1 = sns.lineplot(x=index, y='YES%', data=dx, color='black', marker='o', ax=ax2, label='YES%')
    line2 = sns.lineplot(x=index, y='TOTCUM%', data=dx, color='purple', marker='o', ax=ax2, label='TOTCUM%')
    
    # Adding data labels to line plots
    for i in range(len(dx)):
        ax2.text(index[i], dx['YES%'][i], f'{dx["YES%"][i]}%', color='black', ha='center', va='bottom')
        ax2.text(index[i], dx['TOTCUM%'][i], f'{dx["TOTCUM%"][i]}%', color='purple', ha='center', va='bottom')
    
    # Labels for line plot
    ax2.set_ylabel('Percentage')
    ax2.legend(loc='upper right')
    
    plt.title('Bar plot of YES, NO, TOT and Line plot of YES% and TOTCUM%')
    plt.show()