roptimizationrandommcmc

MCMC for estimating negative binomial distribution


I want to estimate parameters of negative binomial distribution using MCMC Metropolis-Hastings algorithm. In other words, I have sample:

set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)

and I want to write algorithm that will estimate parameter of size and parameter of prob.

My work so far

I defined prior distribution of size as Poisson:

prior_r <- function(r) {
  return(dpois(r, lambda = 2, log = T))
}

And prior distribution of prob as uniform on [0, 1]:

prior_prob <- function(prob) {
  return(dunif(prob, min = 0, max = 1, log = T))
}

Moreover for simplicity I defined loglikelihood and joint probability functions:

loglikelihood <- function(data, r, prob) {
  loglikelihoodValue <- sum(dnorm(data, mean = r, sd = prob, log = T))
  return(loglikelihoodValue)
}


joint <- function(r, prob) {
  data <- y
  return(loglikelihood(data, r, prob) + prior_r(r) + prior_prob(prob))
}

Finally, the whole algorithm:

run_mcmc <- function(startvalue, iterations) {
  
  chain <- array(dim = c(iterations + 1, 2))

  chain[1, ] <- startvalue

  for (i in 1:iterations) {

    proposal_r <- rpois(1, lambda = chain[i, 1])

    proposal_prob <- chain[i, 2] + runif(1, min = -0.2, max = 0.2)
    
    quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
    
    if (runif(1, 0, 1) < min(1, exp(quotient))) chain[i + 1, ] <- c(proposal_r, proposal_prob)
    
    else chain[i + 1, ] <- chain[i, ]

  }

  return(chain)
}

The problem

Problem that I'm having is that when I run it with starting values even very close to correct ones:

iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)

I'll obtain posterior distribution which is obviously wrong. For example

> colMeans(res)
[1] 11.963018  0.994533

As you can see, size is located very close to point 12, and probability is located in point 1.

Do you know what's the cause of those phenomeons?


Solution

  • Change dnorm in loglikelihood to dnbinom and fix the proposal for prob so it doesn't go outside (0,1):

    set.seed(42)
    y <- rnbinom(20, size = 3, prob = 0.2)
    
    prior_r <- function(r) {
      return(dpois(r, lambda = 2, log = T))
    }
    
    prior_prob <- function(prob) {
      return(dunif(prob, min = 0, max = 1, log = TRUE))
    }
    
    loglikelihood <- function(data, r, prob) {
      loglikelihoodValue <- sum(dnbinom(data, size = r, prob = prob, log = TRUE))
      return(loglikelihoodValue)
    }
    
    joint <- function(r, prob) {
      return(loglikelihood(y, r, prob) + prior_r(r) + prior_prob(prob))
    }
    
    run_mcmc <- function(startvalue, iterations) {
      
      chain <- array(dim = c(iterations + 1, 2))
      
      chain[1, ] <- startvalue
      
      for (i in 1:iterations) {
        proposal_r <- rpois(1, lambda = chain[i, 1])
        proposal_prob <- chain[i, 2] + runif(1, min = max(-0.2, -chain[i,2]), max = min(0.2, 1 - chain[i,2]))
        quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
        
        if (runif(1, 0, 1) < min(1, exp(quotient))) {
          chain[i + 1, ] <- c(proposal_r, proposal_prob)
        } else {
          chain[i + 1, ] <- chain[i, ]
        }
      }
      
      return(chain)
    }
    
    iterations <- 2000
    startvalue <- c(4, 0.25)
    res <- run_mcmc(startvalue, iterations)
    colMeans(res)
    #> [1] 3.1009495 0.1988177