I am trying to create a chart below created in excel based on the table below using matplotlib..
Category | %_total_dist_1 | event_rate_%_1 | %_total_dist_2 | event_rate_%_2 |
---|---|---|---|---|
00 (-inf, 0.25) | 5.7 | 36.5 | 5.8 | 10 |
01 [0.25, 4.75) | 7 | 11.2 | 7 | 11 |
02 [4.75, 6.75) | 10.5 | 5 | 10.5 | 4.8 |
03 [6.75, 8.25) | 13.8 | 3.9 | 13.7 | 4 |
04 [8.25, 9.25) | 9.1 | 3.4 | 9.2 | 3.1 |
05 [9.25, 10.75) | 14.1 | 2.5 | 14.2 | 2.4 |
06 [10.75, 11.75) | 13.7 | 1.6 | 13.7 | 1.8 |
07 [11.75, 13.75) | 16.8 | 1.3 | 16.7 | 1.3 |
08 [13.75, inf) | 9.4 | 1 | 9.1 | 1.3 |
The problem I am facing is that
Here is the code I used:
import pandas as pd
import matplotlib.pyplot as plt
# Create a Pandas DataFrame with your data
data = {
"Category": ["00 (-inf, 0.25)", "01 [0.25, 4.75)", "02 [4.75, 6.75)", "03 [6.75, 8.25)",
"04 [8.25, 9.25)", "05 [9.25, 10.75)", "06 [10.75, 11.75)", "07 [11.75, 13.75)", "08 [13.75, inf)"],
"%_total_dist_1": [5.7, 7, 10.5, 13.8, 9.1, 14.1, 13.7, 16.8, 9.4],
"event_rate_%_1": [36.5, 11.2, 5, 3.9, 3.4, 2.5, 1.6, 1.3, 1],
"%_total_dist_2": [5.8, 7, 10.5, 13.7, 9.2, 14.2, 13.7, 16.7, 9.1],
"event_rate_%_2": [10, 11, 4.8, 4, 3.1, 2.4, 1.8, 1.3, 1.3]
}
df = pd.DataFrame(data)
# Create a figure and primary y-axis
fig, ax1 = plt.subplots(figsize=(10, 6))
# Plot percentage distribution on the primary y-axis
ax1.bar(df['Category'], df['%_total_dist_1'], alpha=0.7, label="%_total_dist_1", color='b')
ax1.bar(df['Category'], df['%_total_dist_2'], alpha=0.7, label="%_total_dist_2", color='g')
ax1.set_ylabel('% Distribution', color='b')
ax1.tick_params(axis='y', labelcolor='b')
# Create a secondary y-axis
ax2 = ax1.twinx()
# Plot event rate on the secondary y-axis
ax2.plot(df['Category'], df['event_rate_%_1'], marker='o', label='event_rate_%_1', color='r')
ax2.plot(df['Category'], df['event_rate_%_2'], marker='o', label='event_rate_%_2', color='orange')
ax2.set_ylabel('Event Rate (%)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Adding legend
fig.tight_layout()
plt.title('Percentage Distribution and Event Rate')
fig.legend(loc="upper left", bbox_to_anchor=(0.15, 0.85))
# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha="right")
# Show the plot
plt.show()
To fix the overlapping bars you can assign offsets for each bar which are equal to half the width of the bar. This centers them without overlapping. To rotate the x-axis
labels, you should call plt.xticks(...)
before creating ax2
. This is because the x-labels come from the first axis. Finally, to create the gridlines on the y-axis
you should include ax1.grid(which='major', axis='y', linestyle='--',zorder=1)
. Make sure to set the zorder
parameter to 1 in this line and 2 when creating the bars and lines. This ensures that the gridlines are in the background and don't show up on top of the bars.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Create a Pandas DataFrame with your data
data = {
"Category": ["00 (-inf, 0.25)", "01 [0.25, 4.75)", "02 [4.75, 6.75)", "03 [6.75, 8.25)",
"04 [8.25, 9.25)", "05 [9.25, 10.75)", "06 [10.75, 11.75)", "07 [11.75, 13.75)", "08 [13.75, inf)"],
"%_total_dist_1": [5.7, 7, 10.5, 13.8, 9.1, 14.1, 13.7, 16.8, 9.4],
"event_rate_%_1": [36.5, 11.2, 5, 3.9, 3.4, 2.5, 1.6, 1.3, 1],
"%_total_dist_2": [5.8, 7, 10.5, 13.7, 9.2, 14.2, 13.7, 16.7, 9.1],
"event_rate_%_2": [10, 11, 4.8, 4, 3.1, 2.4, 1.8, 1.3, 1.3]
}
df = pd.DataFrame(data)
# Create a figure and primary y-axis
fig, ax1 = plt.subplots(figsize=(10, 6))
x=np.arange(len(df['Category']))
# THIS LINE MAKES THE HORIZONTAL GRID LINES ON THE PLOT
ax1.grid(which='major', axis='y', linestyle='--',zorder=1)
# THIS PLOTS THE BARS NEXT TO EACH OTHER INSTEAD OF OVERLAPPING
ax1.bar(x+0.1, df['%_total_dist_1'], width=0.2, alpha=1.0, label="%_total_dist_1", color='b',zorder=2)
ax1.bar(x-0.1, df['%_total_dist_2'], width=0.2, alpha=1.0, label="%_total_dist_2", color='g',zorder=2)
ax1.set_ylabel('% Distribution', color='b')
ax1.tick_params(axis='y', labelcolor='b')
# THIS LINE ROTATES THE X-AXIS LABELS
plt.xticks(rotation=45, ha="right")
# Create a secondary y-axis
ax2 = ax1.twinx()
# Plot event rate on the secondary y-axis
ax2.plot(df['Category'], df['event_rate_%_1'], marker='o', label='event_rate_%_1', color='r',zorder=2)
ax2.plot(df['Category'], df['event_rate_%_2'], marker='o', label='event_rate_%_2', color='orange',zorder=2)
ax2.set_ylabel('Event Rate (%)', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Adding legend
fig.tight_layout()
plt.title('Percentage Distribution and Event Rate')
fig.legend(loc="upper left", bbox_to_anchor=(0.15, 0.85))
# Show the plot
plt.show()