set.seed(2024)
#conditions
b1<-c(-0.3,0.3)
b2<-c(-0.3,0.3)
conditions <- expand.grid(b1 = b1, b2 = b2)
# some other conditions
sample_size <- 200
nsim<- 10 #number of simulations
#empty holders
xcoef<-numeric()
zcoef<-numeric()
int_pvalue<-numeric()
results <- data.frame(
condition_id = integer(),
xcoef = numeric(),
zcoef = numeric(),
t1er = numeric()
)
for (i in 1:nrow(conditions)){
xcoef <- numeric(nsim) # Reinitialize storage for x coefficients
zcoef <- numeric(nsim) # Reinitialize storage for z coefficients
int_pvalue <- numeric(nsim) # Reinitialize storage for p-values
for( j in 1:nsim){
#true data generation
sigma<-matrix(c(1,0,
0,1),2,2)
XZ<-as.data.frame(MASS::mvrnorm(n=sample_size,mu=c(0,0),Sigma=sigma))
colnames(XZ)<-c("x","z")
XZ$err<-rnorm(n=sample_size,mean=0,sd=1)
XZ$True_y <-
conditions$b1[i] * XZ$x +
conditions$b2[i] * XZ$z +
XZ$err
lmodel<-summary(lm(True_y ~ x * z, data=XZ))
xcoef[[j]]<-lmodel$coefficients[["x","Estimate"]]
zcoef[[j]]<-lmodel$coefficients[["z","Estimate"]]
# Calculate aggregated results
results$condition_id<-i
results$xcoef[i] <- mean(xcoef) # Mean x coefficient estimate
results$zcoef[i] <- mean(zcoef) # Mean z coefficient estimate
}
}
the error message pops up.
Further, I've noticed that
this part, it is not calculating aggrgated results, rather, it only put the last nsim (nested loop)'s results.
I wanted to aggregate the results across the number of simulations to get mean of each estimates and type 1 error rate.
Something like this?
Rewrite the loops as functions.
nsim
times with replicate
and extracts the relevant statistics;apply
loop on conditions
calls the latter function.The result below does not include the condition
numbers 1 to 4 but that is just a matter of cbind(condition = 1:4, result)
.
sim_one <- function(condition, n, mu, sigma) {
# true data generation
XZ <- MASS::mvrnorm(n = n, mu = mu, Sigma = sigma)
colnames(XZ) <- c("x", "z")
err <- rnorm(n = sample_size, mean = 0, sd = 1)
True_y <- c(condition %*% t(XZ)) + err
XZ <- cbind.data.frame(XZ, True_y)
# fit one regression
lm(True_y ~ x*z, data = XZ)
}
sim_many <- function(condition, R, n, mu, sigma) {
# run nsim regressions
fit_list <- replicate(R, sim_one(condition, n, mu, sigma), simplify = FALSE)
# extract the coefficients and compute their mean values
mean_xz <- sapply(fit_list, \(fit) coef(fit)[c("x", "z")]) |> rowMeans()
mean_xz <- unname(mean_xz)
# get the F statistics mean value
ff <- sapply(fit_list, \(fit) summary(fit)[["fstatistic"]][1L]) |> mean()
# the degrees of freedom are always the same, extract them
# from any of the fits, in this case from the 1st
df12 <- summary(fit_list[[1L]])[["fstatistic"]][2:3]
# p-value of the average F stat
t1er <- pf(ff, df1 = df12[1L], df2 = df12[2L], lower.tail = FALSE)
# return a named vector
c(xcoef = mean_xz[1L], zcoef = mean_xz[2L], t1er = t1er)
}
# conditions
b1 <- c(-0.3, 0.3)
b2 <- c(-0.3, 0.3)
conditions <- expand.grid(b1 = b1, b2 = b2) |> as.matrix()
ncond <- nrow(conditions)
# some other conditions
sample_size <- 200
nsim <- 10 # number of simulations
mu <- c(0, 0)
sigma <- matrix(c(1, 0, 0, 1), 2, 2)
set.seed(2024)
t(apply(conditions, 1L, sim_many, R = nsim, n = sample_size, mu = mu, sigma = sigma))
#> xcoef zcoef t1er
#> [1,] -0.3316541 -0.2840518 4.240253e-08
#> [2,] 0.2629164 -0.2967959 7.393880e-07
#> [3,] -0.3222810 0.2615112 8.463187e-08
#> [4,] 0.2823582 0.3164789 2.251141e-08
Created on 2024-12-01 with reprex v2.1.1