I am trying to plot a population pyramid like this. However, I don't just want the histograms, I want probability densities as well (the sum of the densities should be 1). When I plot them separately using matplotlib, it is pretty easy. But I am unable to use similar code to obtain a population pyramid. Here's my MWE of the histograms using matplotlib -
import numpy as np
import matplotlib.pyplot as plt
X = np.arange(1000)
Y = np.arange(1100)
# Define the bin size and bins
bin_size = 150 # Adjust the bin size as needed
bins = np.arange(0, max(max(X), max(Y)) + bin_size, bin_size)
fig, axs = plt.subplots(2, 1, sharey=True, sharex=True)
# Plot the histogram for X
axs[0].hist(X, bins=bins, color='black', rwidth=0.7, density=True)
# Plot the histogram for Y
axs[1].hist(Y, bins=bins, color='darkgrey', rwidth=0.7, density=True)
# Set title and labels for each subplot
axs[0].set_title('Distribution of X')
axs[1].set_title('Distribution of Y')
axs[1].set_xlabel('Duration (s)') # Set xlabel only for the bottom subplot
axs[0].set_ylabel('Probability Density')
axs[1].set_ylabel('Probability Density')
# Set the y-axis to be logarithmic for each subplot
axs[0].set_yscale("log")
axs[1].set_yscale("log")
# Adjust spacing and show the plot
plt.tight_layout()
plt.show()
I suggest using np.histogram
to compute the probability densities.
With those, you can use a code similar to the one in the question you've linked to draw a population pyramid using seaborn
.
Here's how I would do it:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
X = np.arange(1000)
Y = np.arange(1100)
bin_size = 150
bins = np.arange(0, max(X.max(), Y.max()) + bin_size, bin_size)
df = pd.DataFrame({
"bins": [f"{bin_start}–{bin_end} s" for bin_start, bin_end in zip(bins[:-1], bins[1:])],
"X": -np.histogram(X, bins, density=True)[0],
"Y": np.histogram(Y, bins, density=True)[0],
})
fig, ax = plt.subplots(figsize=(8, 4))
sns.barplot(df, x="X", y="bins", order=df["bins"][::-1], ax=ax, color="black")
sns.barplot(df, x="Y", y="bins", order=df["bins"][::-1], ax=ax, color="darkgrey")
ax.set_title("Population pyramid with densities")
ax.set_xlabel("Distribution of -X and Y")
ax.set_ylabel("Duration bins")
fig.tight_layout()
plt.show()