I'm trying to figure out how to create a candle stick chart with the data from pandas resample method. But let's start with this simple example:
import pandas as pd
from datetime import datetime
from bokeh.plotting import figure, show
data = {
'time': [
datetime(2025, 1, 1),
datetime(2025, 1, 2),
datetime(2025, 1, 3),
],
'open': [10, 40, 20],
'close': [40, 20, 30],
'low': [ 5, 10, 20],
'high': [40, 50, 35],
}
df = pd.DataFrame(data)
inc = df.close > df.open
dec = df.open > df.close
p = figure()
p.segment(df.index, df.high, df.index, df.low, color="black")
p.vbar(df.index[dec], 0.6, df.open[dec], df.close[dec], color="#eb3c40")
p.vbar(df.index[inc], 0.6, df.open[inc], df.close[inc], fill_color="white",
line_color="#49a3a3", line_width=2)
show(p)
This gives me an expected chart:
But let's say that I want to create the same chart but I have data in different format. In this format I don't have specific open/close/low/high values. I have just an array of dates and values. Like here:
data = {
'time': [
datetime(2025, 1, 1, 6),
datetime(2025, 1, 1, 10),
datetime(2025, 1, 1, 14),
datetime(2025, 1, 1, 18),
datetime(2025, 1, 2, 6),
datetime(2025, 1, 2, 10),
datetime(2025, 1, 2, 14),
datetime(2025, 1, 2, 18),
datetime(2025, 1, 3, 6),
datetime(2025, 1, 3, 10),
datetime(2025, 1, 3, 14),
datetime(2025, 1, 3, 18),
],
'price': [
10, 5, 40, 40,
40, 10, 50, 20,
20, 20, 35, 30,
]
}
I know that I can use resample method to group this values by days.
df = pd.DataFrame(data)
resampler = df.resample('D', on='time', kind='period')
Now I can access "open" as resampler.first()
, "close" as resampler.last()
, "low" as resampler.min()
and "high" as resampler.max()
. So looks like I have all I need but I'm struggling to get it together to draw a chart. How can I do it? What will be the equivalent of df.index
here?
Use df.resample
+ Resampler.agg
with named aggregation and df.reset_index
:
df = pd.DataFrame(data)
keys = ['open', 'close', 'low', 'high']
values = ['first', 'last', 'min', 'max']
df_r = (df.resample('D', on='time')
.agg(**{k: ('price', v) for k, v in zip(keys, values)})
).reset_index(drop=True)
inc = df_r.close > df_r.open
dec = df_r.open > df_r.close
p = figure()
p.segment(df_r.index, df_r.high, df_r.index, df_r.low, color="black")
p.vbar(df_r.index[dec], 0.6, df_r.open[dec], df_r.close[dec], color="#eb3c40")
p.vbar(df_r.index[inc], 0.6, df_r.open[inc], df_r.close[inc], fill_color="white",
line_color="#49a3a3", line_width=2)
show(p)
Result:
Intermediate
# `df_r` before `reset_index`: df.resample('D', on='time').agg(...)
open close low high
time
2025-01-01 10 40 5 40
2025-01-02 40 20 10 50
2025-01-03 20 30 20 35
Note that kind
for df.resample
is deprecated since 2.2.0. Just use datetime
, which you already have.
To get proper dates on the x-axis:
width
for vbar
in milliseconds (0.6 * 24 hours to mimick your initial width).x_axis_type='datetime'
inside figure
.DaysTicker
and DatetimeTickFormatter
to get dates for the major x-axis ticks.df.reset_index
.from bokeh.models import DatetimeTickFormatter, DaysTicker
df = pd.DataFrame(data)
keys = ['open', 'close', 'low', 'high']
values = ['first', 'last', 'min', 'max']
df_r = (df.resample('D', on='time')
.agg(**{k: ('price', v) for k, v in zip(keys, values)})
)
inc = df_r.close > df_r.open
dec = df_r.open > df_r.close
w = 0.6*24*60*60*1000
p = figure(x_axis_type='datetime')
start = 1
end = len(df)+1
p.xaxis.ticker = DaysTicker(days=list(range(start, end)))
p.xaxis.formatter = DatetimeTickFormatter(days='%Y-%m-%d')
p.segment(df_r.index, df_r.high, df_r.index, df_r.low, color="black")
p.vbar(df_r.index[dec], w, df_r.open[dec], df_r.close[dec], color="#eb3c40")
p.vbar(df_r.index[inc], w, df_r.open[inc], df_r.close[inc], fill_color="white",
line_color="#49a3a3", line_width=2)
show(p)
Result: