I've been stuck on this issue for a while. This is a simplified version of a larger code.
lc(df, months=c('May', 'June', 'August'))
This should give me the same plot 3 times. It gives me 3 different plots, 2 of them incorrect. I can't even wrap my head around what is happening. When I simply print(p)
inside the loop, it works perfectly. Why doesn't this work? Thanks for any advice.
library(dplyr)
library(ggplot2)
library(ecotox)
df <- ecotox::lamprey_tox
lc<- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
data <- data%>% filter(month %in% c(months))
myplots <- list()
for (i in seq_along(months)){
a <- data[data$month %in% months[i],]
print(a)
lc <- LC_probit((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
weights = total,
data =a)
LC50 <- lc$dose[1] #strip lc50
LC90 <- lc$dose[2] #strip lc90
p <- ggplot(data=a,aes(x=log10(as.numeric(dose)),y=(response/total)))+
geom_point(size=4)+
geom_smooth(method="glm",
method.args=list(family=binomial(link="probit")),
aes(weight=total),colour="blue",se=TRUE, level=0.95) +
geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1)
myplots[[i]] <- p
print(myplots[1])
}
}
UPDATE:
Here is the source code for ecotox:LC_probit
.
LC_probit <- function(formula, data, p = NULL,
weights = NULL,
subset = NULL, log_base = NULL,
log_x = TRUE,
het_sig = NULL,
conf_level = NULL,
conf_type = NULL,
long_output = TRUE) {
model <- do.call("glm", list(formula = formula,
family = binomial(link = "probit"),
data = data,
weights = substitute(weights),
subset = substitute(subset)))
# error message for missing weights argument in function call
if(missing(weights)) {
warning ("Are you using cbind(response, non-response) method as your y variable, if so do not weight the model. If you are using (response / total) method, model needs the total of test organisms per dose to weight the model properly",
call. = FALSE)
}
if (is.null(p)) {
p <- seq(1, 99, 1)
warning("`p`argument has to be supplied otherwise LC values for 1-99 will be displayed", call. = FALSE)
}
chi_square <- sum(residuals.glm(model, type = "pearson") ^ 2)
df <- df.residual(model)
pgof <- pchisq(chi_square, df, lower.tail = FALSE)
summary <- summary(model)
b0 <- summary$coefficients[1]
b1 <- summary$coefficients[2]
intercept_se <- summary$coefficients[3]
intercept_sig <- summary$coefficients[7]
slope_se <- summary$coefficients[4]
slope_sig <- summary$coefficients[8]
z_value <- summary$coefficients[6]
n <- df + 2
est <- qnorm(p / 100)
m <- (est - b0) / b1
if (is.null(het_sig)) {
het_sig <- 0.150
}
if (pgof < het_sig) {
het <- chi_square / df
} else {
het <- 1
}
if (is.null(conf_type)) {
conf_type <- c("fl")
} else {
conf_type <- c("dm")
}
if (conf_type == "fl") {
if (pgof < het_sig) {
vcova <- vcov(model) * het
} else {
vcova <- vcov(model)
}
var_b0 <- vcova[1, 1]
var_b1 <- vcova[2, 2]
cov_b0_b1 <- vcova[1, 2]
if (is.null(conf_level)) {
conf_level <- 0.95
}
t <- (1 - conf_level)
t_2 <- (t / 2)
if (pgof < het_sig) {
tdis <- -qt(t_2, df = df)
} else {
tdis <- -qnorm(t_2)
}
g <- (tdis ^ 2 * var_b1) / (b1 ^ 2)
cl_part_1 <- (g / (1 - g)) * (m + (cov_b0_b1 / var_b1))
cl_part_2 <- (var_b0 + (2 * cov_b0_b1 * m) + (m ^ 2 * var_b1) -
(g * (var_b0 - cov_b0_b1 ^ 2 / var_b1)))
cl_part_3 <- (tdis / ((1 - g) * abs(b1))) * sqrt(cl_part_2)
LCL <- (m + (cl_part_1 - cl_part_3))
UCL <- (m + (cl_part_1 + cl_part_3))
}
cf <- -cbind(1, m) / b1
se_1 <- ((cf %*% vcov(model)) * cf) %*% c(1, 1)
se_2 <- as.numeric(sqrt(se_1))
var_m <- (1 / (m ^ 2)) * (var_b0 + 2 * m * cov_b0_b1 +
m ^ 2 * var_b1)
if (log_x == TRUE) {
if(is.null(log_base)) {
log_base <- 10
}
dose <- log_base ^ m
LCL <- log_base ^ LCL
UCL <- log_base ^ UCL
se_2 <- log_base ^ se_2
}
if (log_x == FALSE) {
dose <- m
LCL <- LCL
UCL <- UCL
se_2 <- se_2
}
if (long_output == TRUE) {
table <- tibble(p = p,
n = n,
dose = dose,
LCL = LCL,
UCL = UCL,
se = se_2,
chi_square = chi_square,
df = df,
pgof_sig = pgof,
h = het,
slope = b1,
slope_se = slope_se,
slope_sig = slope_sig,
intercept = b0,
intercept_se = intercept_se,
intercept_sig = intercept_sig,
z = z_value,
var_m = var_m,
covariance = cov_b0_b1)
}
if (long_output == FALSE) {
table <- tibble(p = p,
n = n,
dose = dose,
LCL = LCL,
UCL = UCL)
}
return(table)
}
Here is the data for ecotox::lamprey_tox
(the data I used in this example which is assigned to df
.
structure(list(nominal_dose = c(0, 0.7, 0.7, 0.7, 1.1, 1.1, 1.1,
1.3, 1.3, 1.3, 1.5, 1.5, 1.5, 1.7, 1.7, 1.7, 2, 2, 2, 0, 1.7,
1.7, 1.7, 2, 2, 2, 2.3, 2.3, 2.3, 2.5, 2.5, 2.5, 2.7, 2.7, 2.7,
3, 3, 3, 0, 2.7, 2.7, 3.3, 3.3, 3.7, 3.7, 4.1, 4.1, 4.5, 4.5,
5, 5, 0, 1.5, 1.5, 2, 2, 2.3, 2.3, 2.6, 2.6, 3, 3, 3.5, 3.5),
tank = c("S", "A", "L", "M", "B", "K", "N", "C", "I", "O",
"D", "J", "P", "E", "H", "Q", "F", "G", "R", "A", "B", "M",
"N", "C", "L", "O", "D", "K", "P", "E", "J", "Q", "F", "I",
"R", "G", "H", "S", "M", "A", "L", "B", "K", "C", "J", "D",
"I", "E", "H", "F", "G", "m", "A", "L", "B", "K", "C", "J",
"D", "I", "E", "H", "F", "G"), month = c("May", "May", "May",
"May", "May", "May", "May", "May", "May", "May", "May", "May",
"May", "May", "May", "May", "May", "May", "May", "June",
"June", "June", "June", "June", "June", "June", "June", "June",
"June", "June", "June", "June", "June", "June", "June", "June",
"June", "June", "August", "August", "August", "August", "August",
"August", "August", "August", "August", "August", "August",
"August", "August", "September", "September", "September",
"September", "September", "September", "September", "September",
"September", "September", "September", "September", "September"
), dose = c(0.19, 0.79, 0.8, 0.76, 1.36, 1.22, 1.18, 1.54,
1.44, 1.36, 1.69, 1.66, 1.6, 1.92, 1.86, 1.8, 2.2, 2.18,
2.1, 0.06, 1.67, 1.69, 1.67, 2.09, 2.1, 2.09, 2.37, 2.38,
2.39, 2.55, 2.59, 2.59, 2.68, 2.78, 2.8, 3.07, 3.09, 3.11,
0.04, 2.81, 2.78, 3.42, 3.42, 3.87, 3.84, 4.25, 4.26, 4.68,
4.69, 5.19, 5.22, 0.05, 1.45, 1.48, 1.99, 2.01, 2.29, 2.32,
2.62, 2.65, 3.09, 3.06, 3.59, 3.6), response = c(0, 0, 1,
0, 13, 10, 7, 16, 11, 17, 17, 19, 18, 22, 19, 18, 20, 20,
20, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 6, 4, 4, 11, 8, 10,
11, 9, 0, 0, 0, 2, 2, 2, 6, 9, 4, 10, 5, 10, 10, 0, 0, 0,
4, 3, 9, 6, 10, 8, 10, 10, 10, 10), survive = c(20, 20, 19,
20, 8, 9, 11, 4, 10, 3, 3, 1, 2, 0, 1, 2, 0, 0, 0, 10, 9,
10, 9, 11, 10, 10, 9, 10, 10, 10, 4, 5, 6, 0, 3, 0, 0, 1,
10, 10, 10, 8, 8, 8, 4, 1, 6, 0, 5, 0, 0, 0, 10, 10, 6, 7,
1, 4, 0, 2, 0, 0, 0, 0), total = c(20, 20, 20, 20, 21, 19,
18, 20, 21, 20, 20, 20, 20, 22, 20, 20, 20, 20, 20, 10, 9,
10, 9, 11, 10, 10, 10, 10, 10, 10, 10, 9, 10, 11, 11, 10,
11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -64L), spec = structure(list(
cols = list(nominal_dose = structure(list(), class = c("collector_double",
"collector")), tank = structure(list(), class = c("collector_character",
"collector")), month = structure(list(), class = c("collector_character",
"collector")), dose = structure(list(), class = c("collector_double",
"collector")), response = structure(list(), class = c("collector_double",
"collector")), survive = structure(list(), class = c("collector_double",
"collector")), total = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
Here is an expanded version of what I want to do. But eventually, I'll build it out more to be more flexible. But this shows the problem clearly. I am using Rstudio. I have seeing this in the viewing pane.
library(dplyr)
library(ggplot2)
library(ecotox)
df <- ecotox::lamprey_tox
lc<- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
data <- data%>% filter(month %in% c(months))
myplots <- list()
for (i in seq_along(months)){
a <- data[data$month %in% months[i],]
print(a)
lc <- LC_probit((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
weights = total,
data =a)
LC50 <- lc$dose[1] #strip lc50
LC90 <- lc$dose[2] #strip lc90
p <- ggplot(data=a,aes(x=log10(as.numeric(dose)),y=(response/total)))+
geom_point(size=4)+
geom_smooth(method="glm",
method.args=list(family=binomial(link="probit")),
aes(weight=total),colour="blue",se=TRUE, level=0.95) +
geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1)
myplots[[i]] <- p
}
return(list(myplots[[1]], myplots[[2]], myplots[[3]]))
}
This is what I get, which is not what I want
If I print p
in the loop, I get the expected results, but it takes away all flexibility of my function.
library(dplyr)
library(ggplot2)
library(ecotox)
df <- ecotox::lamprey_tox
lc<- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
data <- data%>% filter(month %in% c(months))
myplots <- list()
for (i in seq_along(months)){
a <- data[data$month %in% months[i],]
print(a)
lc <- LC_probit((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
weights = total,
data =a)
LC50 <- lc$dose[1] #strip lc50
LC90 <- lc$dose[2] #strip lc90
p <- ggplot(data=a,aes(x=log10(as.numeric(dose)),y=(response/total)))+
geom_point(size=4)+
geom_smooth(method="glm",
method.args=list(family=binomial(link="probit")),
aes(weight=total),colour="blue",se=TRUE, level=0.95) +
geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1)
print(p)
}
}
You can see that now the geom_smooth
and the geom_segment
layers line up as intended.
This is not a complete answer to your question. It is intended to show you how to convert your for
loop to an lapply
call. This will remove lazy evaluation as the root cause of your problem as the *apply
family of functions use forced evaluation, whereas for
loops use lazy evaluation.
As you're using the tidyverse
(and since, as I mentioned earlier, I don't have ecotox
), I'm using the starwars
data frame. Conversion to your use case should be straightforward.
lapply
takes (at least) two arguments. The first is a vector, the second a function. lapply
applies the function to each of the values in the vector in turn, passing the value from the vector to the function as its first argument. lapply
returns the results of the function calls in a list
.
I select a few species from those represented in the data and plot height against mass my species.
library(tidyverse)
lc <- function(d, speciesList) {
lapply(
speciesList,
function(x) {
d %>%
filter(species == x) %>%
ggplot() +
geom_point(aes(x = height, y = mass, colour = homeworld)) +
labs(title = paste0("Species: ", x))
}
)
}
starwars %>% lc(c("Human", "Droid"))
This gives
[[1]]
[[2]]
Warning messages:
1: Removed 15 rows containing missing values or values outside the scale range (`geom_point()`).
2: Removed 2 rows containing missing values or values outside the scale range (`geom_point()`).
in the console and two graphs in the Plots pane:
Of course, facet_wrap()
, facet_grid()
or group_walk()
would be more natural ways of doing this, but my purpose here is not to be natural but to explain how to use lapply
.
Edit
Following OP's edit to the question, here is some untested (as I don't have the ecotox
package and it is not obvious where the LC_probit_2
function comes from) code that converts OP's function to use lapply
. I am not convinced that the month
, total
and response
parameters to the lc
function are necessary.
lc <- function(data, dose='dose', months=c(), month='month', total='total', response='response'){
lapply(
seq_along(months),
function(i) {
a <- data[data$month %in% months[i],]
lc <- LC_probit_2((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
weights = total,
data = a)
LC50 <- lc$dose[1] #strip lc50
LC90 <- lc$dose[2] #strip lc90
ggplot(data = a, aes(x = log10(as.numeric(dose)), y = (response/total))) +
geom_point(size = 4) +
geom_smooth(
method="glm",
method.args=list(family = binomial(link = "probit")),
aes(weight = total), colour = "blue", se = TRUE, level = 0.95
) +
geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1)
}
)
}
If you want to adopt a more tidyverse-pure approach, the following may also be an option:
lc <- function(data, dose='dose', months=c()){
data %>%
filter(month %in% months) %>%
group_by(month) %>%
group_map(
function(.x, .y) {
lc <- LC_probit_2((response/ total) ~ log10(as.numeric(dose)), p = c(50, 90),
weights = total,
data = .x)
LC50 <- lc$dose[1] #strip lc50
LC90 <- lc$dose[2] #strip lc90
.x %>% ggplot(aes(x = log10(as.numeric(dose)), y = (response/total))) +
geom_point(size = 4) +
geom_smooth(
method="glm",
method.args=list(family = binomial(link = "probit")),
aes(weight = total), colour = "blue", se = TRUE, level = 0.95
) +
geom_segment(aes(x = log10(LC50), xend = log10(LC50), y=-Inf, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC50), y=0.5, yend=0.5), linetype='dashed', size=1) +
geom_segment(aes(x = log10(LC90), xend = log10(LC90), y=-Inf, yend=0.9), linetype='dashed', size=1) +
geom_segment(aes(x = -Inf, xend = log10(LC90), y=0.9, yend=0.9), linetype='dashed', size=1)
}
)
}