I'm trying to estimate an AR(1) process y
with a switching mean according to a latent state S
=0,1 that evolves as a markov process with fixed transition probabilities (as in here). In short, it takes the form:
y_t - mu_{0/1} = phi * (y_{t-1} - mu_{0/1})+ epsilon_t
where mu_0
would be used if state_t = 0 and mu_1 if state_t =1.
I'm using jax/numpyro with DiscreteHMCGibbs (although normal NUTS with latent state enumeration yields the same result) but I can't seem to have the sampler work properly. From all diagnostics I run, it seems that all hyperparameters are stuck at initialization value, and summary returns accordingly with all std==0. Here below I have a MWE that reproduces my problem. Is there an obvious mistake I am making in the implementation?
MWE:
import jax.numpy as jnp
import numpyro
import numpyro.distributions as dist
from numpyro.contrib.control_flow import scan
from numpyro.infer import MCMC, NUTS,DiscreteHMCGibbs
from jax import random, pure_callback
import jax
import numpy as np
def generate_synthetic_data(T=100, mu=[0, 5], phi=0.5, sigma=1.0, p=np.array([[0.95, 0.05], [0.1, 0.9]])):
states = np.zeros(T, dtype=np.int32)
y = np.zeros(T)
current_state = np.random.choice([0, 1], p=[0.5, 0.5])
states[0] = current_state
y[0] = np.random.normal(mu[current_state], sigma)
for t in range(1, T):
current_state = np.random.choice([0, 1], p=p[current_state,:])
states[t] = current_state
y[t] = np.random.normal(mu[current_state] + phi * (y[t-1] - mu[current_state]), sigma)
return y, states
def mean_switching_AR1_model(y):
T = len(y)
phi = numpyro.sample('phi', dist.Normal(0, 1))
sigma = numpyro.sample('sigma', dist.Exponential(1))
with numpyro.plate('state_plate', 2):
mu = numpyro.sample('mu', dist.Normal(0, 5))
p = numpyro.sample('p', dist.Dirichlet(jnp.ones(2)))
probs_init = numpyro.sample('probs_init', dist.Dirichlet(jnp.ones(2)))
s_0 = numpyro.sample('s_0', dist.Categorical(probs_init))
def transition_fn(carry, y_t):
prev_state = carry
state_probs = p[prev_state]
state = numpyro.sample('state', dist.Categorical(state_probs))
mu_state = mu[state]
y_mean = mu_state + phi * (y_t - mu_state)
y_next = numpyro.sample('y_next', dist.Normal(y_mean, sigma), obs=y_t)
return state, (state, y_next)
_ , (signal, y)=scan(transition_fn, s_0, y[:-1], length=T-1)
return (signal, y)
# Synthetic data generation
T = 1000
mu_true = [0, 3]
phi_true = 0.5
sigma_true = 0.25
transition_matrix_true = np.array([[0.95, 0.05], [0.1, 0.9]])
y, states_true = generate_synthetic_data(T, mu=mu_true, phi=phi_true, sigma=sigma_true, p=transition_matrix_true)
rng_key = random.PRNGKey(0)
nuts_kernel = NUTS(mean_switching_AR1_model)
gibbs_kernel = DiscreteHMCGibbs(nuts_kernel, modified=True)
# Run MCMC
mcmc = MCMC(gibbs_kernel, num_samples=1000, num_warmup=1000)
mcmc.run(rng_key, y=y)
mcmc.print_summary()
So it turns out there was indeed a pretty obvious mistake in the sense that I was not correctly carrying down y_{t-1}
as part of the state variables. The following corrected transition functions yield the intended result without problems.
def transition_fn(carry, y_curr):
prev_state, y_prev = carry
state_probs = p[prev_state]
state = numpyro.sample('state', dist.Categorical(state_probs))
mu_state = mu[state]
y_mean = mu_state + phi * (y_prev - mu_state)
y_curr = numpyro.sample('y_curr', dist.Normal(y_mean, sigma), obs=y_curr)
return (state, y_curr), (state, y_curr)
_, (signal, y) = scan(transition_fn, (s_0, y[0]), y[1:], length=T-1)