Geometric Brownian motion (gBM) is a stochastic process and can be thought of as an extension of standard Brownian motion.
I am trying to write a function that simulates different paths (ntraj
paths) of gBM and then plots a histogram at certain points specified in a list tcheck
. Once it has plotted these plots the function is meant to superimpose a lognormal distribution on the plot for each time.
The output is meant to look like this
except for gBM rather than a standard Brownian motion process. So far I have a function to generate multiple paths of gBM as,
def oneDGeometricBM(nTraj=100,n=100,T=1.0,sigma=1,mu=0):
'''
DOCSTRING:
1D geomwtric brownian motion
INPUTS:
ntraj = "number of trajectories"
n = "length of a trajectory"
T = "last time point, i.e final tradjectory t = {0,1,...,T}"
sigma= volatility
mu= percentage drift
'''
np.random.seed(52323)
S_0 = 0
# Discretize, dt = time step = $t_{j+1}- t_{j}$
dt = T/(n)
sqrtdt = np.sqrt(dt)
# Container for different colors for each trajectory
colors = plt.cm.jet(np.linspace(0,1,nTraj))
# Container for trajectories
xtraj=np.zeros(n+1,float)
ztraj=np.zeros(n+1,float)
trange=np.linspace(start = 0,stop = T ,num = n+1)
# Simulation
# Random Variable $X_{n}$ is distributed np.sqrt(dt)* N(mu=0,sigma=1)
for j in range(nTraj):
# Loop over time
for i in range(n):
xtraj[i+1]=xtraj[i]+ sqrtdt * np.random.randn() + dt*mu
# Loop again over time in order to make geometric drift
ztraj = S_0 * np.exp(xtraj) # ztraj[z+1]= ztraj[0]+ np.exp(xtraj[z])
plt.plot(trange , xtraj,'b-',alpha=0.2, color=colors[j], lw=3.0,label="$\sigma$={}, $\mu$={:.5f}".format(sigma,mu))
plt.title("1D Geometric Brownian Motion:\n nTraj={}, T={},\n $\Delta t$={:.3f}, $\sigma$={}, $\mu$={:.3f}".format(nTraj,T,dt,sigma,mu))
plt.xlabel(r'$t$')
plt.ylabel(r'$Z_t$');
oneDGeometricBM(nTraj=5,n=10**3,T=10.0,sigma=0.8,mu=1.1)
I have seen many answers to questions on how to plot multiple paths of gBM but I am interested in how to get look at the histograms at specific times and then look at the distribution. Below is my function so far. It is not working but I am not able to figure out what I am doing wrong. I also added the output I got.
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.stats import norm, lognorm
ntraj = 10000
S_0 =0
sigma=1
mu=1
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 1.0
'''
ntraj = 10**4
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 5.0 # limits
'''
n=int(tfinal/dt)
sqrtdt = np.sqrt(dt)
x=np.zeros(shape=[ntraj,n+1], dtype=float)
z=np.zeros(shape=[ntraj,n+1], dtype=float)
zrange=np.arange(start=-xv, stop=xv, step=dt)
# Calculate the number of the bins
binval = math.ceil(np.sqrt(ntraj))
# Nested for loop to create Drifted BM
for i in range(n):
for j in range(ntraj):
x[j,i+1]=x[j,i]+ sqrtdt*np.random.randn()
#Nested loop to create gBM
for j0 in range(ntraj):
for i0 in range(n+1):
z[j0,i0] = 0 + np.exp(x[j0,i0])
# Loop to plot the distribution of gBM tradjectories at different times
for i1 in range(n):
# Compute histogram at every tsample , sample at time t
t=(i1+1)*dt
if t in tcheck:
# Plot histogram on sample
plt.hist(z[:,i1],bins=30,density=False,alpha=0.6,label=['t ={}'.format(t)] )
# Superimpose each samples mean
xbar = np.average(z[:,i1])
plt.axvline(xbar, color='RED', linestyle='dashed', linewidth=2)
# Plot theoretic distribution { N(0, sqrt[t] ) }
#plt.plot(xrange,norm.pdf(xrange,0.0,np.sqrt(t)),'k--')
So to summarize my question. I am trying to simulate multiple trajectories of gBM, store my results in an array then loop over this array and plot a histogram over the specific points using matplotlib then lastly superimpose a lognormal distribution on my histograms.
Edit 1:
I need to superimpose the lognormal distribution on the GBM and the Cauchy if possible. My problem is that when I edited the correction by @Paul Harris I got,
def oneDGeometricBM(nTraj=100,n=100,T=1.0,sigma=1,mu=0):
'''
DOCSTRING:
INPUTS:
ntraj = "number of trajectories"
n = "length of a trajectory"
T = "last time point, i.e final tradjectory t = {0,1,...,T}"
sigma= volatility
mu= percentage drift
'''
np.random.seed(52323)
S0 = 10
# Discretize, dt = time step = $t_{j+1}- t_{j}$
dt = T/(n)
sqrtdt = np.sqrt(dt)
# Container for different colors for each trajectory
colors = plt.cm.jet(np.linspace(0,1,nTraj))
# Container for trajectories
xtraj=np.zeros(n+1,float)
ztraj=np.zeros(n+1,float)
trange=np.linspace(start = 0,stop = T ,num = n+1)
out = []
# Simulation
# Random Variable $X_{n}$ is distributed np.sqrt(dt)* N(mu=0,sigma=1)
for j in range(nTraj):
# Loop over time
for i in range(n):
xtraj[i+1]=xtraj[i]+ sqrtdt * np.random.randn() + dt*mu
# Loop again over time in order to make geometric drift
ztraj = S0 * np.exp(xtraj)
# Return gBM
return ztraj
# Plotting
fig, ax = plt.subplots(ncols=2, figsize=plt.figaspect(1./2))
colors = ['k', 'r', 'b']
T = [1.0, 2.0, 5.0]
sigma=0.8
mu=1.1
for c, T in zip(colors, T):
ztraj = oneDGeometricBM(nTraj=5,n=10**4,T=T,sigma=0.8,mu=1.1)
# Plot Emperical Values
xrange = range(0,80,1)
ax[0].hist(ztraj, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(0, 95))
# Plot the theoretical values
theoretic_mean = math.exp(mu * T + 0.5 * sigma**2 * T)
theoretic_var = math.exp(2* mu * T + sigma**2 * T)* (math.exp(sigma**2 * T) - 1)
ax[0].plot(xrange,lognorm.pdf(xrange, theoretic_mean , theoretic_var ),'k--')
# Plot the differences between consecutive elements of gBM (an array)
diff = np.ediff1d(ztraj)
ax[1].hist(diff, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(-5, 5))
ax[0].set_xlabel('z')
ax[0].set_ylabel('$p(z,T)$')
ax[0].set_title('Histogram of ztraj positions')
ax[1].set_xlabel('dz')
ax[1].set_ylabel('$p(dz,T)$')
ax[1].set_title('Histogram of d(ztraj) positions\nbetween time steps')
ax[0].legend()
fig.tight_layout()
So to summarize I need to superimpose the distribution at each time point, the theoretical distribution for the gBM which is the lognormal distribution.
So I have had a look at your problem. I have edited your function to stop plotting and return xtraj
which I assume is your Brownian motion:
def oneDGeometricBM(nTraj=100,n=100,T=1.0,sigma=1,mu=0):
'''
DOCSTRING:
1D geomwtric brownian motion
INPUTS:
ntraj = "number of trajectories"
n = "length of a trajectory"
T = "last time point, i.e final tradjectory t = {0,1,...,T}"
sigma= volatility
mu= percentage drift
'''
np.random.seed(52323)
S_0 = 10
# Discretize, dt = time step = $t_{j+1}- t_{j}$
dt = T/(n)
sqrtdt = np.sqrt(dt)
# Container for different colors for each trajectory
colors = plt.cm.jet(np.linspace(0,1,nTraj))
# Container for trajectories
xtraj=np.zeros(n+1,float)
ztraj=np.zeros(n+1,float)
trange=np.linspace(start = 0,stop = T ,num = n+1)
out = []
# Simulation
# Random Variable $X_{n}$ is distributed np.sqrt(dt)* N(mu=0,sigma=1)
for j in range(nTraj):
# Loop over time
for i in range(n):
xtraj[i+1]=xtraj[i]+ sqrtdt * np.random.randn() + dt*mu
# Loop again over time in order to make geometric drift
ztraj = S_0 * np.exp(xtraj) # ztraj[z+1]= ztraj[0]+ np.exp(xtraj[z])
return ztraj
The displacement per time step is then the differences within the array xtraj: dx = np.ediff1d(oneDGeometricBM(...))
, so we compute a histogram of those values:
fig, ax = plt.subplots()
ax.hist(np.ediff1d(oneDGeometricBM(nTraj=5,n=10**3,T=10.0,sigma=0.8,mu=1.1)), bins=50, alpha=0.5, label='T=10', density=True)
ax.hist(np.ediff1d(oneDGeometricBM(nTraj=5,n=10**3,T=1.0,sigma=0.8,mu=1.1)), bins=50, alpha=0.5, color='k', label='T=1', density=True)
ax.hist(np.ediff1d(oneDGeometricBM(nTraj=5,n=10**3,T=5.0,sigma=0.8,mu=1.1)), bins=50, alpha=0.5, color='r', label='T=5', density=True)
ax.set_xlabel('x')
ax.set_ylabel('$p(x,T)$')
ax.legend()
I have used 3 different T
values, as in the example. To normalise the histogram such that the y-axis now represents probability p(x,T)
, ie. sum of all p*x = 1
, we use the density=True
argument.
EDIT
I have edited the oneDGeometricBM
function to return ztraj = S0*np.exp(xtraj)
. Your initial S0
value was 0
, so I have made it non-zero.
You can plot ztraj
differences as:
fig, ax = plt.subplots()
colors = ['k', 'r', 'b']
T = [1.0, 2.0, 5.0]
for c, T in zip(colors, T):
ztraj = oneDGeometricBM(nTraj=5,n=10**3,T=T,sigma=0.8,mu=1.1)
diff = np.ediff1d(ztraj)
ax.hist(diff, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(-10, 10))
ax.set_xlabel('x')
ax.set_ylabel('$p(x,T)$')
ax.legend()
EDIT2
From looking more closely at your produced histograms I think your modelling was correct, just the xrange of the plot should be adjusted as ztraj
gets large for large T
, you can limit the histogram using the range
argument. So I have plotted ztraj
and d(ztraj)
for three separate T
. ztraj
does appear to approximately follow a log-normal distribution and the difference in ztraj
appears to approximately follow a Lorentzian distribution (have to check theory on that one, maybe Gaussian). Code to reproduce:
fig, ax = plt.subplots(ncols=2, figsize=plt.figaspect(1./2))
colors = ['k', 'r', 'b']
T = [1.0, 2.0, 5.0]
for c, T in zip(colors, T):
ztraj = oneDGeometricBM(nTraj=5,n=10**4,T=T,sigma=0.8,mu=1.1)
ax[0].hist(ztraj, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(0, 95))
diff = np.ediff1d(ztraj)
ax[1].hist(diff, bins=100, alpha=0.5, label=f'T={T}', density=True, color=c, range=(-5, 5))
ax[0].set_xlabel('z')
ax[0].set_ylabel('$p(z,T)$')
ax[0].set_title('Histogram of ztraj positions')
ax[1].set_xlabel('dz')
ax[1].set_ylabel('$p(dz,T)$')
ax[1].set_title('Histogram of d(ztraj) positions\nbetween time steps')
ax[0].legend()
fig.tight_layout()
And here is your data and plot but limiting the histogram range=(0, 10)
:
EDIT3
I have included code to fit the lognormal distributions and shown them on your original plot. We define the lognormal function as:
from scipy.optimize import curve_fit
def lognorm(x, x0, A, sigma):
return A * np.exp(-(np.log(x)-x0)**2 / (2*sigma**2))
and then fit using the values and bins from the histogram in the final loop as:
# Loop to plot the distribution of gBM tradjectories at different times
for i1 in range(n):
# Compute histogram at every tsample , sample at time t
t=(i1+1)*dt
if t in tcheck:
# Plot histogram on sample
v, b, patches = plt.hist(z[:,i1],bins=200,density=False,alpha=0.6,label=['t ={}'.format(t)], range=(0, 10) )
# second term is bin centre locations rather than bin edges
popt, pcov = curve_fit(lognorm, b[:-1] + np.ediff1d(b), v, p0=(0.1, 300, 0.3))
# make colors match their original data but no transparency
plt.plot(b, lognorm(b, *popt), color=patches[0].get_facecolor()[:3])
print(f'tcheck: {t} with parameters: {popt}')
Output:
tcheck: 0.5 with parameters: [ -0.42334757 358.38545736 0.6748076 ]
tcheck: 1.0 with parameters: [ -0.90719967 321.03944864 0.96137893]
tcheck: 4.0 with parameters: [ -3.66426932 721.41708932 1.86376987]
EDIT4
The whole code to generate the above output would be:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy.stats import norm, lognorm
from scipy.optimize import curve_fit
def lognorm(x, x0, A, sigma):
return A * np.exp(-(np.log(x)-x0)**2 / (2*sigma**2))
ntraj = 10000
S_0 =0
sigma=1
mu=1
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 1.0
'''
ntraj = 10**4
tfinal = 4.0
tcheck = [0.5, 1.0, 4.0]
dt = 0.01
xv = 5.0 # limits
'''
n=int(tfinal/dt)
sqrtdt = np.sqrt(dt)
x=np.zeros(shape=[ntraj,n+1], dtype=float)
z=np.zeros(shape=[ntraj,n+1], dtype=float)
zrange=np.arange(start=-xv, stop=xv, step=dt)
# Calculate the number of the bins
binval = math.ceil(np.sqrt(ntraj))
# Nested for loop to create Drifted BM
for i in range(n):
for j in range(ntraj):
x[j,i+1]=x[j,i]+ sqrtdt*np.random.randn()
#Nested loop to create gBM
for j0 in range(ntraj):
for i0 in range(n+1):
z[j0,i0] = 0 + np.exp(x[j0,i0])
# Loop to plot the distribution of gBM tradjectories at different times
for i1 in range(n):
# Compute histogram at every tsample , sample at time t
t=(i1+1)*dt
if t in tcheck:
# Plot histogram on sample
v, b, patches = plt.hist(z[:,i1],bins=200,density=True,alpha=0.6,label=['t ={}'.format(t)], range=(0, 10))
popt, pcov = curve_fit(lognorm, b[:-1] + np.ediff1d(b), v, p0=(0.1, 300, 0.3))
# make colors match their original data but no transparency
plt.plot(b, lognorm(b, *popt), color=patches[0].get_facecolor()[:3])
print(f'tcheck: {t} for parameters: {popt}')