I am modeling a time series using a Kalman filter in the Python statsmodels package (Python 3.9 and statsmodels 0.13). The state space transition matrix looks like this:
The observation matrix looks like this (note that it depends on both the unobserved variable v as well as past values of the two observed variables p and n):
How might I define this kind of state space equation as a statsmodel MLEModel? I can capture most of this model through:
class AR1(sm.tsa.statespace.MLEModel):
start_params = [-0.5, 0.5, 0.0, 0.05, 0.05] # best guess at initial params
param_names = ['-psi', '1-phi', 'r_t', 'e_t', 'w_t']
def __init__(self, endog):
# Initialize the state space model
super(AR1, self).__init__(endog, k_states=2, k_posdef=1,
initialization='stationary')
# Setup the fixed components of the state space representation
self['design'] = [[1., 1.],
[1., 0]]
self['transition'] = [[1., 0],
[1., 0]]
self['selection', 0, 0] = 1.
# Describe how parameters enter the model
def update(self, params, transformed=True, **kwargs):
params = super(AR1, self).update(params, transformed, **kwargs)
self['design', 0, 1] = params[0] # param.0 is -psi
self['design', 1, 0] = params[1] # param.1 is (1-phi)
self['selection', 0, 0] = params[2] # param.2 is r_t
self['obs_cov', 0, 0] = params[3] # param.3 is e_t
self['obs_cov', 1, 1] = params[4] # param.4 is w_t
# Specify start parameters and parameter names
@property
def start_params(self):
return self.start_params
# Create and fit the model
mod = AR1(DepVars)
# Display results
res = mod.fit()
print(res.summary())
res.plot_diagnostics(figsize=(13,7))
However, I can't seem to figure out how to add the first term of the observation equation, which depends on both psi/phi as well as the prior observations themselves. Is there something I am missing? Perhaps by considering that term as an intercept somehow?
Any thoughts would be greatly appreciated. Thank you!!
Yes, you're exactly right, these can be included in the observation intercept term. Here's how I would write this model:
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.statespace import tools
class SSM(sm.tsa.statespace.MLEModel):
# If you use _param_names and _start_params then the model
# will automatically pick them up
_param_names = ['phi', 'psi', 'sigma2_r', 'sigma2_e', 'sigma2_w']
_start_params = [0.5, 0.5, 0.1, 1, 1]
def __init__(self, endog):
# Extract lagged endog
X = sm.tsa.lagmat(endog, maxlag=1, trim='both', original='in')
endog = X[:, :2]
self.lagged_endog = X[:, 2:]
# Initialize the state space model
# Note: because your state process is nonstationary,
# you can't use stationary initialization
super().__init__(endog, k_states=2, k_posdef=1,
initialization='diffuse')
# Setup the fixed components of the state space representation
self['design'] = [[1., 1.],
[1., 0]]
self['transition'] = [[1., 0],
[1., 0]]
self['selection'] = [[1],
[0]]
# For the parameter estimation part, it is
# helpful to use parameter transformations to
# keep the parameters in the valid domain. Here
# I assumed that you wanted phi and psi to be
# between (-1, 1).
def transform_params(self, params):
params = params.copy()
for i in range(2):
params[i:i + 1] = tools.constrain_stationary_univariate(params[i:i + 1])
params[2:] = params[2:]**2
return params
def untransform_params(self, params):
params = params.copy()
for i in range(2):
params[i:i + 1] = tools.unconstrain_stationary_univariate(params[i:i + 1])
params[2:] = params[2:]**0.5
return params
# Describe how parameters enter the model
def update(self, params, **kwargs):
params = super().update(params, **kwargs)
# Here is where we're putting the lagged endog, multiplied
# by phi and psi into the intercept term
self['obs_intercept'] = self.lagged_endog.T * params[:2, None]
self['design', 0, 1] = -params[0]
self['design', 1, 0] = 1 - params[1]
self['state_cov', 0, 0] = params[2]
self['obs_cov'] = np.diag(params[3:5])
And we can simulate some data and then run the fitting routine to check if it seems to be working correctly:
rs = np.random.RandomState(12345)
# Specify some parameters to simulate data and
# check the model
nobs = 5000
params = np.r_[0.2, 0.8, 0.1, 1.5, 2.5]
# Simulate data
v = rs.normal(scale=params[2]**0.5, size=nobs + 1).cumsum()
e = rs.normal(scale=params[3]**0.5, size=nobs + 1)
w = rs.normal(scale=params[4]**0.5, size=nobs + 1)
p = np.zeros(nobs + 1)
n = np.zeros(nobs + 1)
for t in range(1, nobs):
p[t] = params[0] * p[t - 1] + v[t] - params[0] * v[t - 1] + e[t]
n[t] = params[1] * n[t - 1] + (1 - params[1]) * v[t] + w[t]
y = np.c_[p, n]
# Run MLE routine on the fitted data
mod = SSM(y)
res = mod.fit(disp=False)
# Check the estimated parameters
import pandas as pd
print(pd.DataFrame({
'True': params,
'Estimated': res.params
}, index=mod.param_names).round(2))
shows me:
True Estimated
phi 0.2 0.17
psi 0.8 0.81
sigma2_r 0.1 0.09
sigma2_e 1.5 1.49
sigma2_w 2.5 2.47