I am trying to plot data as a function of time (years) from a pandas data frame. A summary of the data is shown here:
DATE WALCL
0 2010-08-18 2313662
1 2010-08-25 2301015
2 2010-09-01 2301996
3 2010-09-08 2305802
4 2010-09-15 2296079
517 2020-07-15 6958604
518 2020-07-22 6964755
519 2020-07-29 6949032
520 2020-08-05 6945237
521 2020-08-12 6957277
I try to plot the data using the following code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
years = mdates.YearLocator() # every year
months = mdates.MonthLocator() # every month
years_fmt = mdates.DateFormatter('%Y')
dfData = pd.read_csv(sPathIn+sFname, skiprows = 0)
ax = dfData.plot()
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)
datemin = np.datetime64(dfData['DATE'][0], 'Y')
datemax = np.datetime64(dfData['DATE'].iloc[-1], 'Y') + np.timedelta64(1, 'Y')
ax.set_xlim( datemin, datemax)
plt.show()
When I run this code, the plot axes are displayed correctly but the time series data (WALCL) does not appear.
If I omit ax.set_xlim( datemin, datemax)
, the time series data are shown, but the x-axis is no longer formatted correctly (starts at 1970 and runs until 1971).
Here is a modified code example:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
years = mdates.YearLocator() # every year
months = mdates.MonthLocator() # every month
years_fmt = mdates.DateFormatter('%Y')
sPathIn = "C:\\Users\\reg\\projects\\notes\\Political_Economy\\S&P+Fed-Assets\\"
sFname = "WALCL.csv"
and here is the traceback:
Traceback (most recent call last):
File "C:\Users\reg\projects\Notes\Political_Economy\S&P+Fed-Assets\Python\s&p-fed-assets-v0.2.3.py", line 25, in <module>
dfData.set_index('DATE', inplace=True)
File "C:\Users\reg\Anaconda3\lib\site-packages\pandas\core\frame.py", line 4545, in set_index
raise KeyError(f"None of {missing} are in the columns")
KeyError: "None of ['DATE'] are in the columns"
# load data
dfData = pd.read_csv(sPathIn+sFname, skiprows = 0, parse_dates=['DATE'], index_col='DATE')
#set up plot fxn
dfData.set_index('DATE', inplace=True)
ax = dfData.plot('DATE', 'WALCL')
# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)
datemin = np.datetime64(dfData['DATE'][0], 'Y')
datemax = np.datetime64(dfData['DATE'].iloc[-1], 'Y') + np.timedelta64(1, 'Y')
ax.set_xlim( datemin, datemax)
plt.show()
DATE
column is in a datetime format by using parse_dates
with .read_csv
.DATE
as the indeximport pandas as pd
import numpy as np
# verify the DATE column is in a datetime format and set it as the index
dfData = pd.read_csv('WALCL.csv', skiprows=0, parse_dates=['DATE'], index_col='DATE')
# plot the data
ax = dfData.plot(figsize=(20, 8))
datemin = np.datetime64(dfData.index.min(), 'Y')
datemax = np.datetime64(dfData.index.max(), 'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
DATE
as a columnimport pandas as pd
# read file
dfData = pd.read_csv('WALCL.csv', skiprows=0, parse_dates=['DATE'])
# plot data
ax = dfData.plot('DATE', 'WALCL', figsize=(20, 8))