I am fitting a nonlinear regression on a dataset with two explanatory variables Day
and Treatment
modelling a response Amount
using {nls}
. I want to get the predicted values for Amount
together with the 95%-prediction intervals for a new dataset. However, the problem also occurs if I am just re-using the same dataset. So I am including the example without setting the newdata
-argument.
DS <- data.frame(Day=rep(1:5,t=2),
Amount=c(65,17,11,3.5,1.2,85,23,15,5,1.7),
Treatment=rep(c(0,1),e=5))
Model <- nls(Amount ~ (a+b*Treatment) * exp((c+d*Treatment)*Day) + e,
data=DS, start = list(a=10,b=10,c=-0.1,d=-0.1,e=0.1))
Even though I can predict the Amount
values for each data point with predict()
as well as with investr::predFit()
,
> predict(Model)
> [1] 64.814965 18.614433 7.150980 4.306624 3.600871 84.626103 25.804353 9.562934 5.078476 3.840261
> predFit(Model)
> [1] 64.814965 18.614433 7.150980 4.306624 3.600871 84.626103 25.804353 9.562934 5.078476 3.840261
I get the following error message when I try to include prediction intervals.
predFit(Model,interval="prediction",level=0.95)
Error in eval(form[[3]]) : object 'Day' not found In addition: Warning message: In assign(xname, newdata[, xname]) : only the first element is used as variable name
However, if I reduce the complexity of the model using only Day
(no Treatment
) in the formula in {nls}
, everything works well and predFit()
finds Day
. What am I doing wrong?
The culprit is assign(xname, newdata[, xname])
in here.
When there are two or more independent variables, the above code becomes assign(c('variable1','variable2'), newdata[, c('variable1','variable2')])
. This does not make sense and the intended way is to assign them separately.
All we need to do is to replace the culprit with
for (i in 1:length(xname)) {
assign(xname[i], newdata[, xname[i]])
}
The code is here:
predFit.nls <- function(object, newdata, se.fit = FALSE,
interval = c("none", "confidence", "prediction"),
level = 0.95,
adjust = c("none", "Bonferroni", "Scheffe"), k,
...) {
# Match arguments
interval <- match.arg(interval)
adjust <- match.arg(adjust)
# Make sure se.fit is set to TRUE if intervals are requested
compute.se.fit <- if (se.fit || (interval != "none")) {
TRUE
} else {
FALSE
}
# No support for the Golub-Pereyra algorithm for partially linear
# least-squares models
if (interval != "none") {
if (!is.null(object$call$algorithm) && object$call$algorithm == "plinear") {
stop(paste0("The Golub-Pereyra algorithm for partially linear least-",
"squares models is currently not supported."), call. = FALSE)
}
}
# Prediction data
newdata <- if (missing(newdata)) {
eval(stats::getCall(object)$data, envir = parent.frame())
} else {
as.data.frame(newdata)
}
if (is.null(newdata)) {
stop("No data available for predictions.", call. = FALSE)
}
# Name of independent variable
xname <- intersect(all.vars(stats::formula(object)[[3]]), colnames(newdata))
# Predicted values
pred <- object$m$predict(newdata)
# Compute standard error
if (compute.se.fit) {
# Assign values to parameter names in current environment
param.names <- names(stats::coef(object))
for (i in 1:length(param.names)) {
assign(param.names[i], stats::coef(object)[i])
}
# Assign values to independent variable name
for (i in 1:length(xname)) {
assign(xname[i], newdata[, xname[i]])
}
# Calculate gradient (numerically)
form <- object$m$formula()
rhs <- eval(form[[3]])
if (is.null(attr(rhs, "gradient"))) {
f0 <- attr(stats::numericDeriv(form[[3]], param.names), "gradient")
} else { # self start models should have gradient attribute
f0 <- attr(rhs, "gradient")
}
# Calculate standard error
R1 <- object$m$Rmat()
# v0 <- diag(f0 %*% solve(t(R1) %*% R1) %*% t(f0))
v0 <- diag(f0 %*% tcrossprod(solve(crossprod(R1)), f0)) # slightly faster
se_fit <- sqrt(stats::sigma(object)^2 * v0)
}
# Compute results
if (interval == "none") {
# Vector of fitted/predicted values
res <- pred
} else {
# Adjustment for simultaneous inference
crit <- if (adjust == "Bonferroni") { # Bonferroni adjustment
stats::qt((level + 2*k - 1) / (2*k), stats::df.residual(object))
} else if (adjust == "Scheffe") { # Scheffe adjustment
if (interval == "confidence") {
p <- length(stats::coef(object)) # number of regression parameters
# sqrt(p * stats::qf((level + 1) / 2, p, stats::df.residual(object)))
sqrt(p * stats::qf(level, p, stats::df.residual(object)))
} else {
# sqrt(k * stats::qf((level + 1) / 2, k, stats::df.residual(object)))
sqrt(k * stats::qf(level, k, stats::df.residual(object)))
}
} else { # no adjustment
stats::qt((level + 1) / 2, stats::df.residual(object))
}
# Interval calculations
if (interval == "confidence") { # confidence limits for mean response
lwr <- pred - crit * se_fit # lower limits
upr <- pred + crit * se_fit # upper limits
} else { # prediction limits for individual response
lwr <- pred - crit * sqrt(stats::sigma(object)^2 + se_fit^2) # lower limits
upr <- pred + crit * sqrt(stats::sigma(object)^2 + se_fit^2) # upper limits
}
# Store results in a matrix
res <- cbind("fit" = pred, "lwr" = lwr, "upr" = upr)
}
# If standard errors of fitted values are requested, convert results to a list
# and store additional information
if (se.fit) {
res <- list("fit" = if (interval != "none") res else pred, #res,
"se.fit" = se_fit,
"df" = stats::df.residual(object),
"residual.scale" = stats::sigma(object))
}
# Return results
return(res)
}