I am trying to run a bootstrap simulation of a bioequivalence study in R with 3 pharmacokinetic parameters (AUC0-infinity [AUCIFO
], AUClast [AUCLST
], Cmax [CMAX
]). I want to sample with replacement from the actual study dataset to increase the sample size from 25 to 100, perform the BE calculation (using the bw2x2
function from the BE
package), and then repeat that 1000 to 10000 times (depending on how long it takes to run) and generate a geometric mean ratio (GMR) for each parameter with 90%CI.
Here is a reprex dataset that I generated to represent the study date (sample size of 25):
require(dplyr)
require(BE)
set.seed(123)
aucifo_test <- rnorm(25, 2500, 25)
aucifo_ref <- rnorm(25, 2600, 25)
auclst_test <- rnorm(25, 2400, 25)
auclst_ref <- rnorm(25, 2400, 25)
cmax_test <- rnorm(25, 50, 5)
cmax_ref <- rnorm(25, 55, 5)
SUBJ = c(1:25)
GRP = sample(c(1,2), 25, replace = TRUE)
be_df <- rbind(data.frame(SUBJ = SUBJ, GRP = GRP, TRT = "R", AUCIFO = aucifo_ref, AUCLST = auclst_ref, CMAX = cmax_ref),
data.frame(SUBJ = SUBJ, GRP = GRP, TRT = "T",AUCIFO = aucifo_test, AUCLST = auclst_test, CMAX = cmax_test)) %>%
mutate(PRD = case_when(
GRP == 1 & TRT == "R" ~ 1,
GRP == 1 & TRT == "T" ~ 2,
GRP == 2 & TRT == "R" ~ 2,
GRP == 2 & TRT == "T" ~ 1
)) %>%
mutate(GRP = case_when(
GRP == 1 ~ "RT",
GRP == 2 ~ "TR"
)) %>%
select(GRP, PRD, SUBJ, TRT, AUCIFO, AUCLST, CMAX)
be_results <- be2x2(be_df, c("AUCIFO", "AUCLST", "CMAX"))
The output of be2x2
is a list, which can be subsetted by the column name to obtain the GMR. Here is the output of the BE results:
print(be_results)
$AUCIFO
$AUCIFO$`Analysis of Variance (log scale)`
Sum Sq Df Mean Sq F value Pr(>F)
SUBJECT 0.0024154 24 0.0001006 1.8132 0.07915 .
GROUP 0.0001999 1 0.0001999 2.0749 0.16322
SUBJECT(GROUP) 0.0022155 23 0.0000963 1.7355 0.09686 .
PERIOD 0.0003246 1 0.0003246 5.8476 0.02392 *
DRUG 0.0203062 1 0.0203062 365.8577 1.332e-15 ***
ERROR 0.0012766 23 0.0000555
TOTAL 0.0245614 49
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$AUCIFO$`Between and Within Subject Variability`
Between Subject Within Subject
Variance Estimate 2.041146e-05 5.55029e-05
Coefficient of Variation, CV(%) 4.517928e-01 7.45013e-01
$AUCIFO$`Least Square Means (geometric mean)`
Reference Drug Test Drug
Geometric Means 2601.982 2499.114
$AUCIFO$`90% Confidence Interval of Geometric Mean Ratio (T/R)`
Lower Limit Point Estimate Upper Limit
90% CI for Ratio 0.9570003 0.9604654 0.9639432
$AUCIFO$`Sample Size`
True Ratio=1 True Ratio=Point Estimate
80% Power Sample Size 2 2
$AUCLST
$AUCLST$`Analysis of Variance (log scale)`
Sum Sq Df Mean Sq F value Pr(>F)
SUBJECT 0.0019802 24 0.00008251 0.9748 0.52554
GROUP 0.0000025 1 0.00000248 0.0288 0.86668
SUBJECT(GROUP) 0.0019778 23 0.00008599 1.0159 0.48504
PERIOD 0.0003203 1 0.00032025 3.7837 0.06408 .
DRUG 0.0001160 1 0.00011600 1.3705 0.25371
ERROR 0.0019467 23 0.00008464
TOTAL 0.0043485 49
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$AUCLST$`Between and Within Subject Variability`
Between Subject Within Subject
Variance Estimate 6.746561e-07 8.464061e-05
Coefficient of Variation, CV(%) 8.213747e-02 9.200228e-01
$AUCLST$`Least Square Means (geometric mean)`
Reference Drug Test Drug
Geometric Means 2407.201 2399.873
$AUCLST$`90% Confidence Interval of Geometric Mean Ratio (T/R)`
Lower Limit Point Estimate Upper Limit
90% CI for Ratio 0.992516 0.9969559 1.001416
$AUCLST$`Sample Size`
True Ratio=1 True Ratio=Point Estimate
80% Power Sample Size 2 2
$CMAX
$CMAX$`Analysis of Variance (log scale)`
Sum Sq Df Mean Sq F value Pr(>F)
SUBJECT 0.17640 24 0.007350 0.6666 0.834823
GROUP 0.00032 1 0.000321 0.0419 0.839670
SUBJECT(GROUP) 0.17608 23 0.007656 0.6943 0.805917
PERIOD 0.00308 1 0.003078 0.2792 0.602300
DRUG 0.12204 1 0.122036 11.0680 0.002934 **
ERROR 0.25360 23 0.011026
TOTAL 0.55687 49
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
$CMAX$`Between and Within Subject Variability`
Between Subject Within Subject
Variance Estimate 0 0.01102599
Coefficient of Variation, CV(%) 0 10.52948160
$CMAX$`Least Square Means (geometric mean)`
Reference Drug Test Drug
Geometric Means 53.51791 48.47896
$CMAX$`90% Confidence Interval of Geometric Mean Ratio (T/R)`
Lower Limit Point Estimate Upper Limit
90% CI for Ratio 0.8608553 0.9058456 0.9531872
$CMAX$`Sample Size`
True Ratio=1 True Ratio=Point Estimate
80% Power Sample Size 3 6
You can subset the results such that you isolate the GMR point estimate. For example if I want the point estimate for AUCIFO, I can enter be_results$AUCIFO$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2]
.
I am trying to use the bootstrap
package to run the bootstrapping itself. So far I have the following theta
function:
theta <- function(x) {
temp <- data.frame(SAMP = sample(c(1:25), 100, replace=TRUE), ID = c(1:100)) #get random sample of 100 subject IDs
ref <- temp %>% left_join(., x %>% filter(TRT == "R"), by = c("SAMP" = "SUBJ")) %>% select(-SAMP) #join reference samples subject IDs with study data and replace ID with new value
test <- temp %>% left_join(., x %>% filter(TRT == "T"), by = c("SAMP" = "SUBJ")) %>% select(-SAMP) #join test samples subject IDs with study data and replace ID with new value
be_df <- rbind(ref,test) %>% rename(SUBJ=ID) #join test and reference into single dataset
be_results <- be2x2(be_df, c("AUCIFO", "AUCLST", "CMAX"))
AUCIFO_GMR <- c(be_results$AUCIFO$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
AUCLST_GMR <- c(be_results$AUCLST$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
CMAX_GMR <- c(be_results$CMAX$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
}
However, when I try to run the following (small nboot
for troubleshooting): bootstrap(be_df, 10, theta)
, I get the following error message: Error in UseMethod("filter") : no applicable method for 'filter' applied to an object of class "list"
I am thinking this has something to do with the fact that I am trying to use a dataframe in the bootstrap
function. Any help is appreciated!
Solution
Thanks to @jay.sf for the helpful answer! Made some additional tweaks to the code to output a dataframe, which are below:
theta <- function() {
temp <- data.frame(SAMP = sample(c(1:25), 100, replace=TRUE), ID = c(1:100)) #get random sample of 100 subject IDs
ref <- temp %>% left_join(., x101_boot_be %>% filter(TRT == "R"), by = c("SAMP" = "SUBJ")) %>% select(-SAMP) #join reference samples subject IDs with study data and replace ID with new value
test <- temp %>% left_join(., x101_boot_be %>% filter(TRT == "T"), by = c("SAMP" = "SUBJ")) %>% select(-SAMP) #join test samples subject IDs with study data and replace ID with new value
be_df <- rbind(ref,test) %>% rename(SUBJ=ID) #join test and reference into single dataset
be_results <- be2x2_quiet(be_df, c("AUCIFO", "AUCLST", "CMAX"))
AUCIFO_GMR <- c(be_results$AUCIFO$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
AUCLST_GMR <- c(be_results$AUCLST$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
CMAX_GMR <- c(be_results$CMAX$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
output = list(c(AUCIFO_GMR=AUCIFO_GMR, AUCLST_GMR=AUCLST_GMR, CMAX_GMR=CMAX_GMR))
}
boostrap_results <- replicate(1000L, theta())
boostrap_results_df <- as.data.frame(do.call(rbind, boostrap_results))
To replicate
your bootstrap function you actually don't need the boot
package; just re-write it a little.
theta1 <- function() {
temp <- data.frame(SAMP= sample(c(1:25), 100, replace=TRUE), ID= c(1:100)) #get random sample of 100 subject IDs
ref <- temp %>% left_join(., be_df %>% filter(TRT== "R"), by= c("SAMP"= "SUBJ")) %>% select(-SAMP) #join reference samples subject IDs with study data and replace ID with new value
test <- temp %>% left_join(., be_df %>% filter(TRT== "T"), by= c("SAMP"= "SUBJ")) %>% select(-SAMP) #join test samples subject IDs with study data and replace ID with new value
be_df <- rbind(ref, test) %>% rename(SUBJ=ID) #join test and reference into single dataset
be_results <- be2x2_quiet(be_df, c("AUCIFO", "AUCLST", "CMAX"))
AUCIFO_GMR <- c(be_results$AUCIFO$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
AUCLST_GMR <- c(be_results$AUCLST$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
CMAX_GMR <- c(be_results$CMAX$`90% Confidence Interval of Geometric Mean Ratio (T/R)`[2])
c(AUCIFO_GMR, AUCLST_GMR, CMAX_GMR)
}
set.seed(42)
res <- replicate(299L, theta1())
matrixStats::rowQuantiles(res, probs=c(.025, .975))
# 2.5% 97.5%
# [1,] 0.9598403 0.9632898
# [2,] 0.9984093 1.0012312
# [3,] 0.8945873 0.9559859
You could also use the usual apply(res, 1, quantile, probs=c(.025, .975))
but matrixStats::rowQuantiles
is > 50% faster.
And here is the referenced quiet version of b2x2
:
be2x2_quiet <- function (Data, Columns = c("AUClast", "Cmax", "Tmax"), rtfName = "", plot=FALSE) {
if ("data.frame" %in% class(Data)) {
bedata = Data
}
else if ("character" %in% class(Data)) {
bedata = read.csv(Data)
}
else {
stop("Data should be data.frame or file name!")
}
bedata = bedata[order(bedata$GRP, bedata$PRD, bedata$SUBJ),
]
if (!assert(bedata)) {
cat("\n Subject count should be balanced!\n")
return(NULL)
}
nCol = length(Columns)
if (nCol == 0)
stop("Input Error. Please, check the arguments!")
if (rtfName != "") {
rtf = RTF(rtfName)
addHeader(rtf, title = "Bioequivalence Test Result")
addNewLine(rtf)
addHeader(rtf, "Table of Contents")
addTOC(rtf)
}
Result = vector()
for (i in 1:nCol) {
if (plot) { ## defaults to FALSE #######################
plot2x2(bedata, Columns[i])
}
if (toupper(Columns[i]) != "TMAX") {
cResult = test2x2(bedata, Columns[i])
}
else {
cResult = hodges(bedata, Columns[i])
}
if (rtfName != "") {
addPageBreak(rtf)
addHeader(rtf, title = Columns[i], TOC.level = 1)
LineResult = capture.output(print(cResult))
for (j in 1:length(LineResult)) addParagraph(rtf,
LineResult[j])
addPageBreak(rtf)
addPlot(rtf, plot.fun = plot2x2a, width = 6.5, height = 6.5,
res = 300, bedata = bedata, Var = Columns[i])
addPageBreak(rtf)
addPlot(rtf, plot.fun = plot2x2b, width = 6.5, height = 6.5,
res = 300, bedata = bedata, Var = Columns[i])
}
Result = c(Result, list(cResult))
}
if (rtfName != "") {
addPageBreak(rtf)
addSessionInfo(rtf)
done(rtf)
}
names(Result) = Columns
return(Result)
}