rbayesianjags

Dimension mismatch in subset expression in JAGS


I am very new to in bayesian analysis and I was trying to practice with an example from tidytuesday (https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-29/sports.csv)

I have set my model but when trying to run it the following error message appears:

Error in jags.model(textConnection(jags.script_with), data = dataset, : RUNTIME ERROR: Compilation error on line 5. Dimension mismatch in subset expression of y

Below my approach:

Sports2 =
  bind_rows(
    sports_clean %>%
      select(year, institution_name, sports, 
             participants = partic_women,
             revenue = rev_women,
             expenditure = exp_women) %>%
      mutate(gender=1), #women
    sports_clean %>%
      select(year, institution_name, sports, 
             participants = partic_men,
             revenue = rev_men,
             expenditure = exp_men) %>%
      mutate(gender=0) #men
  ) %>% na.omit

An example row of the dataset:

Year institution_name sports participants revenue expenditure gender
2015 Alabama A&M Uni Soccer 21 410717 432648 1
#modeling with regression
set.seed(123)

model_with =
  lm(expenditure ~ gender + participants, data=Sports2)

model_with

#dataset for jags model
dataset = list(x=Sports2[,c(4,7)], y=Sports2[,6], n=nrow(Sports2))


#estimation coefficients
dataset$b_guess = model_with$coefficients

#Model
jags.script_with =
  "
model{
  #likelihood
  for (i in 1:n){
    y[i] ~ dnorm(mu[i], tau)
    mu[i] = intercept + participants*x[i,1]
  }
  #prioirs
  intercept ~ dnorm(bgues[1], 0.1)
  participants ~ dnorm(b_guess[2], 0.1)
  tau ~ dgamma(0.01,0.01)
  #transform
  sigma = 1/sqrt(tau)
}
"

#compiling 
mod_with = jags.model(textConnection(jags.script_with),
                      data = dataset,
                      n.chains = 4, n.adapt = 2000)

I can't figure out how to resolve the issue.

Looking for advice, please.

Thank you in advance!

Edit:

I have removed all (for the model) "unnecessary" parts. This is now the corrected code - unfortunately I cannot figure out why the error still persists.

Compiling model graph Resolving undeclared variables Deleting model

Error in jags.model(textConnection(jags.script_with), data = dataset, : RUNTIME ERROR: Compilation error on line 5. Dimension mismatch in subset expression of y

library(rjags)
library(tidyverse)
library(ggplot2)

sports_raw = read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-29/sports.csv')

Sports2 =
  bind_rows(
    sports_raw %>%
      select(year, institution_name, sports, 
             participants = partic_women,
             revenue = rev_women,
             expenditure = exp_women) %>%
      mutate(gender=1), #women
    sports_raw %>%
      select(year, institution_name, sports, 
             participants = partic_men,
             revenue = rev_men,
             expenditure = exp_men) %>%
      mutate(gender=0) #men
  ) %>% na.omit

#modeling with regression
set.seed(123)

model_with =
  lm(expenditure ~ gender + participants, data=Sports2)

model_with

#dataset for jags model
dataset = list(x=Sports2[,c(4,7)], y=Sports2[,6], n=nrow(Sports2))


#estimation coefficients
dataset$b_guess = model_with$coefficients

#Model
jags.script_with =
  "
model{
  #likelihood
  for (i in 1:n){
    y[i] ~ dnorm(mu[i], tau)
    mu[i] = intercept + participants*x[i,1]
  }
  #prioirs
  intercept ~ dnorm(b_guess[1], 0.1)
  participants ~ dnorm(b_guess[2], 0.1)
  tau ~ dgamma(0.01,0.01)
  #transform
  sigma = 1/sqrt(tau)
}
"

#compiling 
mod_with = jags.model(textConnection(jags.script_with),
                      data = dataset,
                      n.chains = 4, n.adapt = 2000)

Solution

  • The problem was that in your original code, you're subsetting a tibble using the [ and unlike in a regular data frame, where it would turn that single column into a vector, the tibble remains a tibble with one variable. The error really states that instead of being a vector as you intend in the model code, the y variable is actually a one-column data frame, which JAGS treats differently from a vector.

    library(rjags)
    library(tidyverse)
    library(ggplot2)
    
    sports_raw = read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-29/sports.csv')
    
    Sports2 =
      bind_rows(
        sports_raw %>%
          select(year, institution_name, sports, 
                 participants = partic_women,
                 revenue = rev_women,
                 expenditure = exp_women) %>%
          mutate(gender=1), #women
        sports_raw %>%
          select(year, institution_name, sports, 
                 participants = partic_men,
                 revenue = rev_men,
                 expenditure = exp_men) %>%
          mutate(gender=0) #men
      ) %>% na.omit
    
    #modeling with regression
    set.seed(123)
    
    model_with =
      lm(expenditure ~ gender + participants, data=Sports2)
    
    model_with
    
    #dataset for jags model
    dataset = list(x=Sports2[,c(4,7)], y=Sports2[,6], n=nrow(Sports2))
    dim(dataset$y)
    [1] 130748      1
    

    There are two ways to fix this, you can make y a vector in the data:

    dataset = list(x=Sports2[,c(4,7)], 
                   y=Sports2$expenditure, n=nrow(Sports2))
    dim(dataset$y)
    # NULL
    length(dataset$y)
    # [1] 130748
    

    Or, you could change the likelihood part of your model to acknowledge that y is a one-column matrix:

    y[i,1] ~ dnorm(mu[i], tau)
    

    The rest of the model could stay as it is. Do one or the other of these (though not both at the same time) and your model will run.