rintervalspredictionnls

R predFit(): Problem with prediction interval for nls in two variables


I am fitting a nonlinear regression on a dataset with two explanatory variables Day and Treatment modelling a response Amount using {nls}. I want to get the predicted values for Amount together with the 95%-prediction intervals for a new dataset. However, the problem also occurs if I am just re-using the same dataset. So I am including the example without setting the newdata-argument.

DS <- data.frame(Day=rep(1:5,t=2),
                 Amount=c(65,17,11,3.5,1.2,85,23,15,5,1.7),
                 Treatment=rep(c(0,1),e=5))

Model <- nls(Amount ~ (a+b*Treatment) * exp((c+d*Treatment)*Day) + e,
  data=DS, start = list(a=10,b=10,c=-0.1,d=-0.1,e=0.1))

Even though I can predict the Amount values for each data point with predict() as well as with investr::predFit(),

> predict(Model)
> [1] 64.814965 18.614433  7.150980  4.306624  3.600871 84.626103 25.804353  9.562934  5.078476  3.840261
> predFit(Model)
> [1] 64.814965 18.614433  7.150980  4.306624  3.600871 84.626103 25.804353  9.562934  5.078476  3.840261

I get the following error message when I try to include prediction intervals.

predFit(Model,interval="prediction",level=0.95)

Error in eval(form[[3]]) : object 'Day' not found In addition: Warning message: In assign(xname, newdata[, xname]) : only the first element is used as variable name

However, if I reduce the complexity of the model using only Day (no Treatment) in the formula in {nls}, everything works well and predFit() finds Day. What am I doing wrong?


Solution

  • The culprit is assign(xname, newdata[, xname]) in here.

    When there are two or more independent variables, the above code becomes assign(c('variable1','variable2'), newdata[, c('variable1','variable2')]). This does not make sense and the intended way is to assign them separately.

    All we need to do is to replace the culprit with

    for (i in 1:length(xname)) { 
          assign(xname[i], newdata[, xname[i]])  
        }
    

    The code is here:

    predFit.nls <- function(object, newdata, se.fit = FALSE,
                            interval = c("none", "confidence", "prediction"), 
                            level = 0.95, 
                            adjust = c("none", "Bonferroni", "Scheffe"), k, 
                            ...) {
      
      # Match arguments
      interval <- match.arg(interval)
      adjust <- match.arg(adjust)
      
      # Make sure se.fit is set to TRUE if intervals are requested
      compute.se.fit <- if (se.fit || (interval != "none")) {
        TRUE
      } else {
        FALSE
      }
      
      # No support for the Golub-Pereyra algorithm for partially linear 
      # least-squares models
      if (interval != "none") {
        if (!is.null(object$call$algorithm) && object$call$algorithm == "plinear") {
          stop(paste0("The Golub-Pereyra algorithm for partially linear least-", 
                      "squares models is currently not supported."), call. = FALSE)
        }
      }
      
      # Prediction data
      newdata <- if (missing(newdata)) {
        eval(stats::getCall(object)$data, envir = parent.frame()) 
      } else {
        as.data.frame(newdata) 
      }
      if (is.null(newdata)) {
        stop("No data available for predictions.", call. = FALSE)
      }
      
      # Name of independent variable
      xname <- intersect(all.vars(stats::formula(object)[[3]]), colnames(newdata)) 
      
      # Predicted values
      pred <- object$m$predict(newdata)
      
      # Compute standard error
      if (compute.se.fit) {
        
        # Assign values to parameter names in current environment
        param.names <- names(stats::coef(object))  
        for (i in 1:length(param.names)) { 
          assign(param.names[i], stats::coef(object)[i])  
        }
        
        # Assign values to independent variable name
        
        
        for (i in 1:length(xname)) { 
          assign(xname[i], newdata[, xname[i]])  
        }
        
        # Calculate gradient (numerically)
        form <- object$m$formula()
        rhs <- eval(form[[3]])
        if (is.null(attr(rhs, "gradient"))) {
          f0 <- attr(stats::numericDeriv(form[[3]], param.names), "gradient")
        } else {  # self start models should have gradient attribute
          f0 <- attr(rhs, "gradient")
        }
        
        # Calculate standard error
        R1 <- object$m$Rmat()
        # v0 <- diag(f0 %*% solve(t(R1) %*% R1) %*% t(f0))
        v0 <- diag(f0 %*% tcrossprod(solve(crossprod(R1)), f0))  # slightly faster
        se_fit <- sqrt(stats::sigma(object)^2 * v0)
        
      }
      
      # Compute results
      if (interval == "none") {
        
        # Vector of fitted/predicted values
        res <- pred    
        
      } else { 
        
        # Adjustment for simultaneous inference
        crit <- if (adjust == "Bonferroni") {  # Bonferroni adjustment 
          
          stats::qt((level + 2*k - 1) / (2*k), stats::df.residual(object))
          
        } else if (adjust == "Scheffe") {  # Scheffe adjustment
          
          if (interval == "confidence") {
            p <- length(stats::coef(object))  # number of regression parameters
            # sqrt(p * stats::qf((level + 1) / 2, p, stats::df.residual(object))) 
            sqrt(p * stats::qf(level, p, stats::df.residual(object))) 
          } else {
            # sqrt(k * stats::qf((level + 1) / 2, k, stats::df.residual(object))) 
            sqrt(k * stats::qf(level, k, stats::df.residual(object))) 
          }     
          
        } else {  # no adjustment   
          
          stats::qt((level + 1) / 2, stats::df.residual(object))   
          
        }
        
        # Interval calculations
        if (interval == "confidence") {  # confidence limits for mean response
          lwr <- pred - crit * se_fit  # lower limits
          upr <- pred + crit * se_fit  # upper limits
        } else {  # prediction limits for individual response
          lwr <- pred - crit * sqrt(stats::sigma(object)^2 + se_fit^2)  # lower limits
          upr <- pred + crit * sqrt(stats::sigma(object)^2 + se_fit^2)  # upper limits
        }
        
        # Store results in a matrix
        res <- cbind("fit" = pred, "lwr" = lwr, "upr" = upr)
        
      }
      
      # If standard errors of fitted values are requested, convert results to a list
      # and store additional information
      if (se.fit) {
        res <- list("fit" = if (interval != "none") res else pred, #res,
                    "se.fit" = se_fit,
                    "df" = stats::df.residual(object),
                    "residual.scale" = stats::sigma(object))
      }
      
      # Return results
      return(res)
      
    }