Data I'm working with: https://drive.google.com/file/d/1xb7icmocz-SD2Rkq4ykTZowxW0uFFhBl/view?usp=sharing
Hey everyone,
I am a bit stuck with editing a plot. Basically, I would like my x value to display the months in the year, but it doesn't seem to work because of the data type (?). Do you have any idea how I could get my plot to have months in the x axis?
If you need more context about the data, please let me know!!!
Thank you!
Here's my code for the plot and the initial data modifications:
import matplotlib.pyplot as plt
import mplleaflet
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import numpy as np
df = pd.read_csv("data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv")
df['degrees']=df['Data_Value']/10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date']<'2015-01-01']
df3 = df[df['Date']>='2015-01-01']
max_temp = df2.groupby([(df2.Date.dt.month),(df2.Date.dt.day)])['degrees'].max()
min_temp = df2.groupby([(df2.Date.dt.month),(df2.Date.dt.day)])['degrees'].min()
max_temp2 = df3.groupby([(df3.Date.dt.month),(df3.Date.dt.day)])['degrees'].max()
min_temp2 = df3.groupby([(df3.Date.dt.month),(df3.Date.dt.day)])['degrees'].min()
max_temp.plot(x ='Date', y='degrees', kind = 'line')
min_temp.plot(x ='Date',y='degrees', kind= 'line')
plt.fill_between(range(len(min_temp)),min_temp, max_temp, color='C0', alpha=0.2)
ax = plt.gca()
ax.set(xlabel="Date",
ylabel="Temperature",
title="Extreme Weather in 2015")
plt.legend()
plt.tight_layout()
x = plt.gca().xaxis
for item in x.get_ticklabels():
item.set_rotation(45)
plt.show()
Plot I'm getting:
Change the index based on month abbreviations using Index.map
and calendar
This is just for df2
:
import calendar
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
max_temp = df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees'].max()
min_temp = df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees'].min()
# Update the index to be the desired display format for x-axis
max_temp.index = max_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
min_temp.index = min_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
max_temp.plot(x='Date', y='degrees', kind='line')
min_temp.plot(x='Date', y='degrees', kind='line')
plt.fill_between(range(len(min_temp)), min_temp, max_temp,
color='C0', alpha=0.2)
ax = plt.gca()
ax.set(xlabel="Date", ylabel="Temperature", title="Extreme Weather 2005-2014")
x = plt.gca().xaxis
for item in x.get_ticklabels():
item.set_rotation(45)
plt.margins(x=0)
plt.legend()
plt.tight_layout()
plt.show()
As an aside: the title "Extreme Weather in 2015" is incorrect because this data includes all years before 2015. This is "Extreme Weather 2005-2014"
The year range can be checked with min
and max
as well:
print(df2.Date.dt.year.min(), '-', df2.Date.dt.year.max())
# 2005 - 2014
The title could be programmatically generated with:
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
Simplify the code using groupby aggregate
to create a single DataFrame then convert the index in the same way as above:
import calendar
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
# Get Max and Min Degrees in Single Groupby
df2_temp = (
df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees']
.agg(['max', 'min'])
)
# Convert Index to whatever display format is desired:
df2_temp.index = df2_temp.index.map(lambda x: f'{calendar.month_abbr[x[0]]}')
# Plot
ax = df2_temp.plot(
kind='line', rot=45,
xlabel="Date", ylabel="Temperature",
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
)
# Fill between
plt.fill_between(range(len(df2_temp)), df2_temp['min'], df2_temp['max'],
color='C0', alpha=0.2)
plt.margins(x=0)
plt.tight_layout()
plt.show()
Convert the index to a datetime using pd.to_datetime
. Choose any leap year to uniform the data (it must be a leap year so Feb-29 does not raise an error). Then set the set_major_formatter
using the format string %b
to use the month abbreviation:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("...")
df['degrees'] = df['Data_Value'] / 10
df['Date'] = pd.to_datetime(df['Date'])
df2 = df[df['Date'] < '2015-01-01']
# Get Max and Min Degrees in Single Groupby
df2_temp = (
df2.groupby([df2.Date.dt.month, df2.Date.dt.day])['degrees']
.agg(['max', 'min'])
)
# Convert to DateTime of Same Year
# (Must be a leap year so Feb-29 doesn't raise an error)
df2_temp.index = pd.to_datetime(
'2000-' + df2_temp.index.map(lambda s: '-'.join(map(str, s)))
)
# Plot
ax = df2_temp.plot(
kind='line', rot=45,
xlabel="Date", ylabel="Temperature",
title=f"Extreme Weather {df2.Date.dt.year.min()}-{df2.Date.dt.year.max()}"
)
# Fill between
plt.fill_between(df2_temp.index, df2_temp['min'], df2_temp['max'],
color='C0', alpha=0.2)
# Set xaxis formatter to month abbr with the %b format string
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
plt.tight_layout()
plt.show()
The benefit of this approach is that the index is a datetime and therefore will format better than the string representations of options 1 and 2.