rstatisticsbeta

Computing empirical Beta in a given distribution


I have to calculate an empirical β and compare it with the theoretical one by measuring the difference. This is the model to develop the new code:

if (!require("pwr")) install.packages("pwr")

# parameters
N=1000
n = 25
k = 500
a_teo = 0.05

#Here we create the universe 
poblacion <- rnorm(N, 10, 10)
m.pob <- mean(poblacion)
sd.pob <- sd(poblacion)

#Data simulation and store of p.values
p <- vector(length=k)
for (i in 1:k){
 muestra <- poblacion[sample(1:N, n)]
 p[i] <- t.test(muestra, mu=15)$p.value
}

#Empiric beta
beta_emp <- length(p[p>a_teo])/k

#Theoretical beta
d=(10 - 15) / sd.pob
beta_teo <- 1 - pwr.t.test(n,
                           d=d,
                           sig.level=a_teo,
                            type="one.sample")$power
#Print theoretical vs empirical bet
sprintf("beta_teo = %.3f -- beta_emp = %.3f -- Diff = %.3f", beta_teo, beta_emp, beta_teo-beta_emp)

Using the previous as reference, I have to run a simulation of a distribution that's NOT 'normal', but instead just have values in the range (2, 3, 4, 5) and measure the difference between Empirical and Theoretical beta. This is my code:

if (!require("pwr")) install.packages("pwr")

N=1000
n = 25
k = 500
a_teo = .05

poblacion <- sample(2:5, N, replace = T)
m.pob <- mean(poblacion)
sd.pob <- sd(poblacion)

p <- vector(length=k)
for (i in seq_along(p)){
  muestra <- sample(2:5, N, replace = T)
  p[i] <- t.test(muestra, mu=m.pob)$p.value
}

#Empiric beta
beta_emp <- length(p[p>a_teo])/k

#Theoretical beta
d=(10 - m.pob) / sd.pob
beta_teo <- 1 - pwr.t.test(n,
                           d=d,
                           sig.level=a_teo,
                           type="one.sample")$power
sprintf("beta_teo = %.3f -- beta_emp = %.3f -- Diff = %.3f", beta_teo, beta_emp, beta_teo-beta_emp)


I get 'zero' as the value for the theoretical beta. Can someone tell me where is the problem? The '10' value is the mean from the example, so it should probably be changed for this one.


Solution

  • There are several misunderstood points.

    library(pwr)
    
    n = 20   # sample size
    k = 5000 # number of simulated samples
    a_teo = .05
    
    m.pob <- 2 # population mean under H1
    mu <- 1    # actual population mean
    stdev <- 1.5
    
    p <- numeric(k)
    for (i in seq_along(p)){
      muestra <- rnorm(n, mu, stdev)
      p[i] <- t.test(muestra, mu = m.pob)$p.value
    }
    
    #Empiric beta
    beta_emp <- length(p[p>a_teo])/k
    
    #Theoretical beta
    d <- (mu - m.pob) / stdev
    beta_teo <- 1 - pwr.t.test(n,
                               d = d,
                               sig.level = a_teo,
                               type = "one.sample")$power
    sprintf("beta_teo = %.3f -- beta_emp = %.3f -- Diff = %.3f", beta_teo, beta_emp, beta_teo-beta_emp)