I'm experimenting with 1D time-series data and trying to reproduce the following approach via animation over my own data in GoogleColab notebook.
I faced the problem of re-producing animation from this post when you pass data type of column list values as 'datetime'
with dataframe when the x-axis is timestamp! I assume there is a bug somewhere even when I try to index timestamp
column and anime the plots in which x-axis values passed by using df.index
.
what I have tried unsuccessfully following scripts based on learning from my post available in reference at the end of my post:
#-----------------------------------------------------------
# Libs
#-----------------------------------------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.patches import Rectangle
from IPython.display import HTML
#-----------------------------------------------------------
# LOAD THE DATASET
#-----------------------------------------------------------
df = pd.read_csv('https://raw.githubusercontent.com/amcs1729/Predicting-cloud-CPU-usage-on-Azure-data/master/azure.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.rename(columns={'min cpu': 'min_cpu',
'max cpu': 'max_cpu',
'avg cpu': 'avg_cpu',})
df.head()
# Data preparation
# ==============================================================================
sliced_df = df[['timestamp', 'avg_cpu']]
# convert column to datetime object
#sliced_df['timestamp'] = pd.to_datetime(sliced_df['timestamp'], format='%Y-%m-%d %H:%M:%S')
#df = df.set_index('timestamp')
step_size = 4*287
data_train = sliced_df[:-step_size]
data_test = sliced_df[-step_size:] #unseen
#-----------------------------------------------------------
# Animation
#-----------------------------------------------------------
# create plot
plt.style.use("ggplot") # <-- set overall look
fig, ax = plt.subplots( figsize=(10,4))
# plot data
plt.plot(list(sliced_df['timestamp']), sliced_df['avg_cpu'], 'r-', linewidth=0.5, label='data or y')
# make graph beautiful
plt.plot([], [], 'g-', label="Train", linewidth=8, alpha=0.3)
plt.plot([], [], 'b-', label="Test", linewidth=8, alpha=0.3)
step_size = 287
selected_ticks = sliced_df['timestamp'][::step_size]
plt.xticks(selected_ticks, rotation=90)
#plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
Y_LIM = 2*10**8 #df[f'{name_columns}'].max()
TRAIN_WIDTH = 288*27
TEST_WIDTH = 357*1
print(TRAIN_WIDTH)
print(TEST_WIDTH)
#plt.title(f'Data split:\n taraing-set {100*(len(data_train)/len(df)):.2f}% = {TRAIN_WIDTH/288:.2f} days and test-set {100*(len(data_test)/len(df)):.2f}% = {TEST_WIDTH/288:.f} days')
plt.title(f'Data split:\n taraing-set % = days and test-set % = days')
plt.ylabel(f' usage', fontsize=15)
plt.xlabel('Timestamp', fontsize=15)
plt.grid(True)
#plt.legend(loc="upper left")
plt.legend(bbox_to_anchor=(1.3,.9), loc="upper right")
fig.tight_layout(pad=1.2)
def init():
rects = [Rectangle((0, 0) , TRAIN_WIDTH, Y_LIM, alpha=0.3, facecolor='green'),
Rectangle((0 + TRAIN_WIDTH, 0), TEST_WIDTH, Y_LIM, alpha=0.3, facecolor='blue')]
patches = []
for rect in rects:
patches.append(ax.add_patch(rect))
return patches
def update(x_start):
patches[0].xy = (x_start, 0)
patches[1].xy = (x_start + TRAIN_WIDTH, 0)
return patches
# create "Train" and "Test" areas
patches = init()
ani = FuncAnimation(
fig,
update,
frames= np.linspace(0, 288, 80), # all starting points
interval=50,
blit=True)
HTML(ani.to_html5_video())
My current output:
Expected animation output (but with full timestamp):
Reference:
The problem is that green and blue Patches you want to draw are relative to Axes, but your graph data are having values of themselves. You need to translate your data to Matplotlib plot coordinates.
Below you can find a very naive and hacky approach, but it should give you a general idea how to handle it:
plt.style.use("ggplot") # <-- set overall look
fig, ax = plt.subplots( figsize=(10,4))
# plot data
plt.plot(list(sliced_df['timestamp']), sliced_df['avg_cpu'], 'r-', linewidth=0.5, label='data or y')
# translate data to graph coordinates
ax.margins(x=0, y=0)
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()
ax.margins(x=0.05, y=0.05)
height = y_max-y_min
def init():
rects = [Rectangle((x_min, y_min), 5, height, alpha=0.3, facecolor='green'),
Rectangle((x_min+5, y_min), 3, height, alpha=0.3, facecolor='blue')]
patches = []
for rect in rects:
patches.append(ax.add_patch(rect))
return patches
def update(x_start):
patches[0].xy = (x_start, y_min)
patches[1].xy = (x_start + 5, y_min)
return patches
patches = init()
ani = FuncAnimation(
fig,
update,
frames= np.linspace(x_min, x_max-0.25*(x_max-x_min), 50), # all starting points
interval=150,
blit=True)
HTML(ani.to_html5_video())