I'm plotting date vs frequency horizontal bar charts that compares the monthly distribution pattern over time for a selection of crimes as subplots. The problem is the tick labels of the y-axis, which represents the date, display all the months over period of 2006-2023. I want to instead display the year whilst preserving the monthly count of the plot. Basically change the scale from month to year without changing the data being plotted.
Here's a sample of my code below:
Dataset: https://drive.google.com/file/d/11MM-Vao6_tHGTRMsLthoMGgtziok67qc/view?usp=sharing
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
df = pd.read_csv('NYPD_Arrests_Data__Historic__20250113_111.csv')
df['ARREST_DATE'] = pd.to_datetime(df['ARREST_DATE'], format = '%m/%d/%Y')
df['ARREST_MONTH'] = df['ARREST_DATE'].dt.to_period('M').dt.to_timestamp()
# crimes, attributes and renames
crimes = ['DANGEROUS DRUGS', 'DANGEROUS WEAPONS', 'ASSAULT 3 & RELATED OFFENSES', 'FELONY ASSAULT']
attributes = ['PERP_RACE']
titles = ['Race']
# loops plot creation over each attribute
for attr, title in zip(attributes, titles):
fig, axes = plt.subplots(1, len(crimes), figsize = (4 * len(crimes), 6), sharey = 'row')
for i, crime in enumerate(crimes):
ax = axes[i]
crime_df = df[df['OFNS_DESC'] == crime]
pivot = pd.crosstab(crime_df['ARREST_MONTH'], crime_df[attr])
# plots stacked horizontal bars
pivot.plot(kind = 'barh', stacked = True, ax = ax, width = 0.9, legend = False)
ax.set_title(crime)
ax.set_xlabel('Frequency')
ax.set_ylabel('Month' if i == 0 else '') # shows the y-axis only on first plot
ax.xaxis.set_tick_params(labelsize = 8)
ax.set_yticks(ax.get_yticks())
# adds one common legend accoss plots
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, title = title, loc = 'upper center', ncol = len(df[attr].unique()), bbox_to_anchor = (0.5, 0.94))
fig.suptitle(f'Crime Frequency Distribution by Year and {title}', fontsize = 20)
plt.tight_layout(rect = [0, 0, 1, 0.90])
plt.show()
pandas makes the assumption that the major axis of a bar-chart is always categorical, and therefore converts your values to strings prior to plotting. This means that it forces matplotlib to render a label for every bar you have.
The way to do this with minimal changes to your code would be to manually override the yticklabels
with your own custom ones. You can create a Series that contains the year (as a string) whenever the year in the current row is different than that of the next row. Then fill in empty strings for the other case when the year of the current row is the same as the next row.
import pandas as pd
s = pd.Series([2000, 2001, 2002, 2003]).repeat(3)
print(
pd.DataFrame({
'orig': s,
'filtered': s.pipe(lambda s: s.astype('string').where(s != s.shift(), ''))
})
)
# orig filtered
# 0 2000 2000
# 0 2000
# 0 2000
# 1 2001 2001
# 1 2001
# 1 2001
# 2 2002 2002
# 2 2002
# 2 2002
# 3 2003 2003
# 3 2003
# 3 2003
Putting this into action in your code would look like:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
df = pd.read_csv('NYPD_Arrests_Data__Historic__20250113_111.csv')
df['ARREST_DATE'] = pd.to_datetime(df['ARREST_DATE'], format = '%m/%d/%Y')
df['ARREST_MONTH'] = df['ARREST_DATE'].dt.to_period('M').dt.to_timestamp()
# crimes, attributes and renames
crimes = ['DANGEROUS DRUGS', 'DANGEROUS WEAPONS', 'ASSAULT 3 & RELATED OFFENSES', 'FELONY ASSAULT']
attributes = ['PERP_RACE']
titles = ['Race']
# loops plot creation over each attribute
for attr, title in zip(attributes, titles):
fig, axes = plt.subplots(1, len(crimes), figsize = (4 * len(crimes), 6), sharey = 'row')
for i, crime in enumerate(crimes):
ax = axes[i]
crime_df = df[df['OFNS_DESC'] == crime]
pivot = pd.crosstab(crime_df['ARREST_MONTH'], crime_df[attr])
# plots stacked horizontal bars
pivot.plot(kind = 'barh', stacked = True, ax = ax, width = 0.9, legend = False)
ax.set_title(crime)
ax.set_xlabel('Frequency')
ax.set_ylabel('Month' if i == 0 else '') # shows the y-axis only on first plot
ax.xaxis.set_tick_params(labelsize = 8)
ax.yaxis.set_tick_params(size=0)
yticklabels = (
pivot.index.year.to_series()
.pipe(
lambda s: s.astype('string').where(s != s.shift(), '')
)
)
ax.set_yticklabels(yticklabels)
axes.flat[0].invert_yaxis()
handles, labels = axes.flat[0].get_legend_handles_labels()
fig.legend(handles, labels, title = title, loc = 'upper center', ncol = len(df[attr].unique()), bbox_to_anchor = (0.5, 0.94))
fig.suptitle(f'Crime Frequency Distribution by Year and {title}', fontsize = 20)
plt.tight_layout(rect = [0, 0, 1, 0.90])
plt.show()
Note that I also inverted the y-axis to make the dates increase as the viewer moves their eyes down the chart.
This is done with the axes.flat[0].invert_yaxis()
line (it inverts tha axis on all charts since they share the y-axis)