I have data on some dependent varaible y
that can be modeled as a function of covariates x1
and x2
. y
and x1
are observed at the "plot" level, and and x2
is observed at the "site" level. Plot is nested within site, hierarchically. Here are 100 observations of y
with associated covariate data.
#generate covariate data at plot and site scales.
x1 <- runif(100,0,1) #100 plot level observations of x1
x2 <- runif(10,10,20) #10 site level observations of x2
#generate site values - in this case characters A:J
site_1 <- LETTERS[sort(rep(seq(1,10, by = 1),10))]
site_2 <- LETTERS[sort(seq(1,10, by = 1))]
#put together site level data - 10 observations for 10 sites.
site_data <- data.frame(site_2,x2)
colnames(site_data) <- c('site','x2')
#put together plot level data - 100 observations across 10 sites
plot_data <- data.frame(site_1,x1)
colnames(plot_data) <- c('site','x1')
plot_data <- merge(plot_data,site_data, all.x=T) #merge in site level data.
#pick parameter values.
b1 <- 10
b2 <- 0.2
#y is a function of the plot level covariate x1 and the site level covariate x2.
plot_data$y <- b1*plot_data$x1 + b2*plot_data$x2 + rnorm(100)
#check that the model fits. it does.
summary(lm(y ~ x1 + x2, data = plot_data))
I can model y
as a function of x1
and x2
no problem in jags using the data frame plot_data
which basically replicates the site level observations of x2
10 times per site.
What I would really like to do however is fit the model hierarchically such that
y[i] ~ x1[i] + x2[j]
, where [i]
indicates the plot level observation and [j]
indexes the site. How can I modify the below JAGS code to do this?
#fit a JAGS model
jags.model = "
model{
# priors
b1 ~ dnorm(0, .001)
b2 ~ dnorm(0, .001)
tau <- pow(sigma, -2)
sigma ~ dunif(0, 100)
#normal model
for (i in 1:N){
y[i] ~ dnorm(y.hat[i], tau)
y.hat[i] <- b1*x1[i] + b2*x2[i]
}
} #end model
"
#setup jags data as a list
jd <- list(y=plot_data$y, x1=plot_data$x1, x2=plot_data$x2, N=length(plot_data$y))
library(runjags)
#run jags model
jags.out <- run.jags(jags.model,
data=jd,
adapt = 1000,
burnin = 1000,
sample = 2000,
n.chains=3,
monitor=c('b1', 'b2'))
summary(jags.out)
You just need an indexing vector at the response level that corresponds to the unique levels within site (which is easiest if it is coded as a factor). The following model is exactly equivalent to what you already have:
jags.model = "
model{
# priors
b1 ~ dnorm(0, .001)
b2 ~ dnorm(0, .001)
tau <- pow(sigma, -2)
sigma ~ dunif(0, 100)
# Response:
for (i in 1:N){
y[i] ~ dnorm(y.hat[i], tau)
y.hat[i] <- b1*x1[i] + site_effect[plot_site[i]]
}
# Effect of site:
for (s in 1:S){
site_effect[s] <- b2 * x2_site[site_site[s]]
}
}
"
# Ensure the site is coded as a factor with the same levels in both data frames:
plot_data$site <- factor(plot_data$site)
site_data$site <- factor(site_data$site, levels=levels(plot_data$site))
#setup jags data as a list
jd <- list(y=plot_data$y, x1=plot_data$x1, plot_site=plot_data$site,
site_site=site_data$site, x2_site=site_data$x2,
N=length(plot_data$y), S=nrow(site_data))
library(runjags)
#run jags model
jags.out <- run.jags(jags.model,
data=jd,
adapt = 1000,
burnin = 1000,
sample = 2000,
n.chains=3,
monitor=c('b1', 'b2'))
summary(jags.out)
The advantage of the hierarchical approach is that the effect of site can now be modified to e.g. incorporate a random effect or whatever.
Matt
Edit to add an example of a random effect
The following code adds a random effect of site along with the fixed effect corresponding to x2:
jags.model = "
model{
# priors
b1 ~ dnorm(0, .001)
b2 ~ dnorm(0, .001)
tau <- pow(sigma, -2)
sigma ~ dunif(0, 100)
tau.site <- pow(sigma.site, -2)
sigma.site ~ dunif(0, 100)
# Response:
for (i in 1:N){
y[i] ~ dnorm(y.hat[i], tau)
y.hat[i] <- b1*x1[i] + site_effect[plot_site[i]]
}
# Effect of site (fixed and random effects):
for (s in 1:S){
site_effect[s] <- b2 * x2_site[site_site[s]] + random[site_site[s]]
random[site_site[s]] ~ dnorm(0, tau.site)
}
}
"
# Ensure the site is coded as a factor with the same levels in both data frames:
plot_data$site <- factor(plot_data$site)
site_data$site <- factor(site_data$site, levels=levels(plot_data$site))
#setup jags data as a list
jd <- list(y=plot_data$y, x1=plot_data$x1, plot_site=plot_data$site,
site_site=site_data$site, x2_site=site_data$x2,
N=length(plot_data$y), S=nrow(site_data))
library(runjags)
#run jags model
jags.out <- run.jags(jags.model,
data=jd,
adapt = 1000,
burnin = 1000,
sample = 2000,
n.chains=3,
monitor=c('b1', 'b2', 'sigma.site', 'sigma'))
summary(jags.out)
This may or may not be a sensible model for your application - it is just an example. In this case, sigma.site is estimated as being quite small because it did not feature in the data simulation.