python-3.xstatsmodelspymcstate-space

How to use PyMC (v4.0.1) with statsmodels to estimate state space models?


I am trying to use PyMC (v4.0.1) with statsmodels to estimate state-space models. I am following this example that uses PyMC3:

https://www.statsmodels.org/v0.12.0/examples/notebooks/generated/statespace_sarimax_pymc3.html

The example makes use of the pm.DensityDist() function and apparently the API has changed. And PyMC uses Aesara instead of Theano and I have no idea if that matters.

As a working example, here is my attempt to simulate and estimate an AR(1) process:

import numpy as np
import statsmodels.api as sm
import pymc as pm
import aesara.tensor as at
from scipy.signal import lfilter

# Generate artificial data
nobs = int(1e3)
true_phi = np.r_[0.5]
true_sigma = 0.5**0.5

np.random.seed(1234)
disturbances = np.random.normal(0, true_sigma, size=(nobs,))
endog = lfilter([1], np.r_[1, -true_phi], disturbances)

# Initialize model
mod = sm.tsa.statespace.SARIMAX(endog, order=(1, 0, 0))


# Helper functions copied. Do not know how they work
class Loglike(at.Op):

    itypes = [at.dvector] # expects a vector of parameter values when called
    otypes = [at.dscalar] # outputs a single scalar value (the log likelihood)

    def __init__(self, model):
        self.model = model
        self.score = Score(self.model)

    def perform(self, node, inputs, outputs):
        theta, = inputs  # contains the vector of parameters
        llf = self.model.loglike(theta)
        outputs[0][0] = np.array(llf) # output the log-likelihood

    def grad(self, inputs, g):
        # the method that calculates the gradients - it actually returns the
        # vector-Jacobian product - g[0] is a vector of parameter values
        theta, = inputs  # our parameters
        out = [g[0] * self.score(theta)]
        return out


class Score(at.Op):
    itypes = [at.dvector]
    otypes = [at.dvector]

    def __init__(self, model):
        self.model = model

    def perform(self, node, inputs, outputs):
        theta, = inputs
        outputs[0][0] = self.model.score(theta)
        
        
loglike = Loglike(mod)

# Set sampling params
ndraws = 3000  # number of draws from the distribution
nburn = 600   # number of "burn-in points" (which will be discarded)

# Sample from posterior
with pm.Model():
    # Priors
    arL1 = pm.Uniform('ar.L1', -0.99, 0.99)
    sigma2 = pm.InverseGamma('sigma2', 2, 4)

    # convert variables to tensor vectors
    theta = at.as_tensor_variable([arL1, sigma2])

    # use a DensityDist (use a lamdba function to "call" the Op)
    pm.DensityDist('likelihood', theta, logp = lambda v: loglike(v))

    # Draw samples
    trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)

The error is in the call to pm.sample().

---> 74     trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)

But the error message suggests that the issue has something to do with the likelihood function:

TypeError: <lambda>() takes 1 positional argument but 2 were given

I've tried a bunch of things, but I'm at a loss. I'd really appreciate any suggestions.


Solution

  • Issue fixed by replacing:

    pm.DensityDist('likelihood', theta, logp = lambda v: loglike(v))

    with

    pm.Potential('likelihood', loglike(theta))

    Here's the full working code:

    import numpy as np
    import statsmodels.api as sm
    import pymc as pm
    import aesara.tensor as at
    from scipy.signal import lfilter
    
    # Generate artificial data
    nobs = int(1e3)
    true_phi = np.r_[0.5]
    true_sigma = 0.5**0.5
    
    np.random.seed(1234)
    disturbances = np.random.normal(0, true_sigma, size=(nobs,))
    endog = lfilter([1], np.r_[1, -true_phi], disturbances)
    
    # Initialize model
    mod = sm.tsa.statespace.SARIMAX(endog, order=(1, 0, 0))
    
    
    # Helper functions copied. Do not know how they work
    class Loglike(at.Op):
    
        itypes = [at.dvector] # expects a vector of parameter values when called
        otypes = [at.dscalar] # outputs a single scalar value (the log likelihood)
    
        def __init__(self, model):
            self.model = model
            self.score = Score(self.model)
    
        def perform(self, node, inputs, outputs):
            theta, = inputs  # contains the vector of parameters
            llf = self.model.loglike(theta)
            outputs[0][0] = np.array(llf) # output the log-likelihood
    
        def grad(self, inputs, g):
            # the method that calculates the gradients - it actually returns the
            # vector-Jacobian product - g[0] is a vector of parameter values
            theta, = inputs  # our parameters
            out = [g[0] * self.score(theta)]
            return out
    
    
    class Score(at.Op):
        itypes = [at.dvector]
        otypes = [at.dvector]
    
        def __init__(self, model):
            self.model = model
    
        def perform(self, node, inputs, outputs):
            theta, = inputs
            outputs[0][0] = self.model.score(theta)
            
            
    loglike = Loglike(mod)
    
    # Set sampling params
    ndraws = 3000  # number of draws from the distribution
    nburn = 600   # number of "burn-in points" (which will be discarded)
    
    # Sample from posterior
    with pm.Model():
        # Priors
        arL1 = pm.Uniform('ar.L1', -0.99, 0.99)
        sigma2 = pm.InverseGamma('sigma2', 2, 4)
    
        # convert variables to tensor vectors
        theta = at.as_tensor_variable([arL1, sigma2])
    
        # use a DensityDist (use a lamdba function to "call" the Op)
        pm.Potential('likelihood', loglike(theta))
    
        # Draw samples
        trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)