I'm trying to include a black box likelihood function in a pymc3 model. This likelihood function just takes a vector of parameter values and returns the likelihood (all data is already included in the function).
So far I've been following this guide and have modified the code as follows to accommodate the fact my model only has one parameter k.
from elsewhere import loglikelihood
# define a theano Op for our likelihood function
class LogLike(tt.Op):
"""
Specify what type of object will be passed and returned to the Op when it is
called. In our case we will be passing it a vector of values (the parameters
that define our model) and returning a single "scalar" value (the
log-likelihood)
"""
itypes = [tt.dvector] # expects a vector of parameter values when called
otypes = [tt.dscalar] # outputs a single scalar value (the log likelihood)
def __init__(self, loglike):
"""
Initialise the Op with various things that our log-likelihood function
requires. Below are the things that are needed in this particular
example.
Parameters
----------
loglike:
The log-likelihood (or whatever) function we've defined
"""
# add inputs as class attributes
self.loglikelihood = loglike
def perform(self, node, inputs, outputs):
# the method that is used when calling the Op
(theta,) = inputs # this will contain my variables
# call the log-likelihood function
logl = self.loglikelihood(theta)
outputs[0][0] = np.array(logl) # output the log-likelihood
fixed_sigma_loglikelihood = lambda x: loglikelihood(np.concatenate((x,nul_sigmas)))
logl = lh.LogLike(fixed_sigma_loglikelihood)
# Create and sample 1 parameter model
ndraws = 100
nburn = 10
k_true = 2.5
the_model = pm.Model()
with the_model:
k = pm.Uniform("k", lower=0, upper=15)
# Convert the prior to a theano tensor variable
theta = tt.as_tensor_variable([k])
# Create density distribution for our numerical likelihood
pm.DensityDist("likelihood", lambda y: logl(y), observed={'y':theta})
trace = pm.sample(ndraws, tune = nburn, discard_tuned_samples=True)
Just printing the output I can see that it is sampling corectly. However, when done sampling it simply fails with the following error:
Only 100 samples in chain.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS failed. Falling back to elementwise auto-assignment.
Multiprocess sampling (4 chains in 4 jobs)
Slice: [k]
Could not pickle model, sampling singlethreaded.
Sequential sampling (4 chains in 1 job)
Slice: [k]
Sampling 4 chains for 10 tune and 100 draw iterations (40 + 400 draws total) took 41 seconds.████| 100.00% [110/110 00:10<00:00 Sampling chain 3, 0 divergences]
Traceback (most recent call last):
File "~/code/integrating-data/pymc_model.py", line 54, in <module>
trace = pm.sample(ndraws, tune = nburn, discard_tuned_samples=True)
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/pymc3/sampling.py", line 639, in sample
idata = arviz.from_pymc3(trace, **ikwargs)
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/arviz/data/io_pymc3_3x.py", line 580, in from_pymc3
return PyMC3Converter(
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/arviz/data/io_pymc3_3x.py", line 181, in __init__
self.observations, self.multi_observations = self.find_observations()
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/arviz/data/io_pymc3_3x.py", line 194, in find_observations
multi_observations[key] = val.eval() if hasattr(val, "eval") else val
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/graph/basic.py", line 554, in eval
self._fn_cache[inputs] = theano.function(inputs, self)
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/compile/function/__init__.py", line 337, in function
fn = pfunc(
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/compile/function/pfunc.py", line 524, in pfunc
return orig_function(
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/compile/function/types.py", line 1970, in orig_function
m = Maker(
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/compile/function/types.py", line 1584, in __init__
fgraph, additional_outputs = std_fgraph(inputs, outputs, accept_inplace)
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/compile/function/types.py", line 188, in std_fgraph
fgraph = FunctionGraph(orig_inputs, orig_outputs, update_mapping=update_mapping)
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/graph/fg.py", line 162, in __init__
self.import_var(output, reason="init")
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/graph/fg.py", line 330, in import_var
self.import_node(var.owner, reason=reason)
File "/Users/llamagod/miniconda3/envs/mcmc/lib/python3.9/site-packages/theano/graph/fg.py", line 383, in import_node
raise MissingInputError(error_msg, variable=var)
theano.graph.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(k_interval__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.
I'm new to pymc3 so this kind of has me lost. I unfortunately need some form of black box methodology as the likelihood relies on computing a simulation.
As per the comments I checked out this thread and discovered that pm.potential really was the cleanest way to achieve black-box likelihood. Modifying the code above as follows did the trick:
# Create and sample 1 parameter model
ndraws = 100
nburn = 10
k_true = 2.5
the_model = pm.Model()
with the_model:
k = pm.Uniform("k", lower=0, upper=15)
# Convert the prior to a theano tensor variable
theta = tt.as_tensor_variable([k])
# Adds an arbitrary potential term to the posterior
pm.Potential("likelihood", logl(theta))
trace = pm.sample(ndraws, tune = nburn, discard_tuned_samples=True)