I've written the following code to compare the theoretical alpha = 0.05 with the empirical one from the buit-in t.test in Rstudio:
set.seed(1)
N <- 1000
n <- 20
k <- 500
poblacion <- rnorm(N, 10, 10) #Sample
mu.pob <- mean(poblacion)
sd.pob <- sd(poblacion)
p <- vector(length=k)
for (i in 1:k) {
muestra <- poblacion[sample(1:N, n)]
p[i] <- t.test(muestra, mu=mu.pob)$p.value
}
a_teo <- 0.05
a_emp <- length(p[p < a_teo])/k
sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp)
And it works printing both theoretical and empirical values. Now I wanna make it more general, to different values of 'n', so I wrote this:
set.seed(1)
N <- 1000
n <- 20
k <- 500
z <-c()
for (i in n){
poblacion <- rnorm(N, 10, 10)
mu.pob <- mean(poblacion)
sd.pob <- sd(poblacion)
p <- vector(length=k)
for (j in 1:k){
muestra <- poblacion[sample(1:N, length(n))]
p[j] <- t.test(muestra, mu = mu.pob)$p.value
}
a_teo = 0.05
a_emp = length(p[p<a_teo])/k
append(z, a_emp)
print(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
}
plot(n, z)
The sprintf
alone won't do in a for
loop, you need wrap it in print
.
> for (i in n) {
+ poblacion <- rnorm(N, 10, 10)
+ mu.pob <- mean(poblacion)
+ sd.pob <- sd(poblacion)
+ p <- vector(length=k)
+ for (j in 1:k) {
+ muestra <- poblacion[sample(1:N, length(n))]
+ p[j] <- t.test(muestra, mu=mu.pob)$p.value
+ }
+ a_teo <- 0.05
+ a_emp <- length(p[p<a_teo])/k
+ print(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
+ }
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.056"
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.050"
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.064"
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.048"
A more R-ish way to do this would be to wrap the logic in a function.
> comp_fn <- \(N, n, k, alpha=.05, verbose=FALSE) {
+ poblacion <- rnorm(N, 10, 10)
+ mu.pob <- mean(poblacion)
+ sd.pob <- sd(poblacion)
+ p <- replicate(k, t.test(poblacion[sample(1:N, n)], mu=mu.pob)$p.value)
+ a_emp <- length(p[p < alpha])/k
+ if (verbose) {
+ message(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
+ }
+ c(a_teo, a_emp)
+ }
>
> set.seed(1)
> comp_fn(1000, 20, 500)
[1] 0.050 0.058
> comp_fn(1000, 20, 500, verbose=TRUE)
alpha_teo = 0.050 <-> alpha_emp = 0.042
[1] 0.050 0.042
To loop over different arguments, mapply
is your friend.
> set.seed(1)
> mapply(comp_fn, 1000, c(2, 10, 15, 20), 500)
[,1] [,2] [,3] [,4]
[1,] 0.050 0.050 0.050 0.050
[2,] 0.058 0.054 0.048 0.046