rglmtweedie

Package dglm in R


I am trying to fit a double glm in R using the dglm package. This is used in combination with the statmod package to use the tweedie model. A reproduction of the problem is:

library(dglm)
library(statmod)

p <- 1.5
y <- runif(10)
x <- runif(10)

dglm(y~x,~x,family=tweedie(link.power=0, var.power=p))
#doesnt work

dglm(y~x,~x,family=tweedie(link.power=0, var.power=1.5))
#works

var.power needs to be defined in a variable, since I want to use a loop where dglm runs on every entry of it


Solution

  • So, you can fix the problem by forcing dglm to evaluate the call where you input p. In the dglm function, on about line 73:

    if (family$family == "Tweedie") {
        tweedie.p <- call$family$var.power
    }
    

    should be:

    if (family$family == "Tweedie") {
        tweedie.p <- eval(call$family$var.power)
    }
    

    You can make your own function with the patch like this:

    dglm.nograpes <- function (formula = formula(data), dformula = ~1, family = gaussian, 
        dlink = "log", data = sys.parent(), subset = NULL, weights = NULL, 
        contrasts = NULL, method = "ml", mustart = NULL, betastart = NULL, 
        etastart = NULL, phistart = NULL, control = dglm.control(...), 
        ykeep = TRUE, xkeep = FALSE, zkeep = FALSE, ...) 
    {
        call <- match.call()
        if (is.character(family)) 
            family <- get(family, mode = "function", envir = parent.frame())
        if (is.function(family)) 
            family <- family()
        if (is.null(family$family)) {
            print(family)
            stop("'family' not recognized")
        }
        mnames <- c("", "formula", "data", "weights", "subset")
        cnames <- names(call)
        cnames <- cnames[match(mnames, cnames, 0)]
        mcall <- call[cnames]
        mcall[[1]] <- as.name("model.frame")
        mframe <<- eval(mcall, sys.parent())
        mf <- match.call(expand.dots = FALSE)
        y <- model.response(mframe, "numeric")
        if (is.null(dim(y))) {
            N <- length(y)
        }
        else {
            N <- dim(y)[1]
        }
        nobs <- N
        mterms <- attr(mframe, "terms")
        X <- model.matrix(mterms, mframe, contrasts)
        weights <- model.weights(mframe)
        if (is.null(weights)) 
            weights <- rep(1, N)
        if (is.null(weights)) 
            weights <- rep(1, N)
        if (!is.null(weights) && any(weights < 0)) {
            stop("negative weights not allowed")
        }
        offset <- model.offset(mframe)
        if (is.null(offset)) 
            offset <- rep(0, N)
        if (!is.null(offset) && length(offset) != NROW(y)) {
            stop(gettextf("number of offsets is %d should equal %d (number of observations)", 
                length(offset), NROW(y)), domain = NA)
        }
        mcall$formula <- formula
        mcall$formula[3] <- switch(match(length(dformula), c(0, 2, 
            3)), 1, dformula[2], dformula[3])
        mframe <- eval(mcall, sys.parent())
        dterms <- attr(mframe, "terms")
        Z <- model.matrix(dterms, mframe, contrasts)
        doffset <- model.extract(mframe, offset)
        if (is.null(doffset)) 
            doffset <- rep(0, N)
        name.dlink <- substitute(dlink)
        if (is.name(name.dlink)) {
            if (is.character(dlink)) {
                name.dlink <- dlink
            }
            else {
                dlink <- name.dlink <- as.character(name.dlink)
            }
        }
        else {
            if (is.call(name.dlink)) 
                name.dlink <- deparse(name.dlink)
        }
        if (!is.null(name.dlink)) 
            name.dlink <- name.dlink
        if (family$family == "Tweedie") {
            tweedie.p <- eval(call$family$var.power)
        }
        Digamma <- family$family == "Gamma" || (family$family == 
            "Tweedie" && tweedie.p == 2)
        if (Digamma) {
            linkinv <- make.link(name.dlink)$linkinv
            linkfun <- make.link(name.dlink)$linkfun
            mu.eta <- make.link(name.dlink)$mu.eta
            valid.eta <- make.link(name.dlink)$valid.eta
            init <- expression({
                if (any(y <= 0)) {
                    print(y)
                    print(any(y <= 0))
                    stop("non-positive values not allowed for the DM gamma family")
                }
                n <- rep.int(1, nobs)
                mustart <- y
            })
            dfamily <- structure(list(family = "Digamma", variance = varfun.digamma, 
                dev.resids = function(y, mu, wt) {
                    wt * unitdeviance.digamma(y, mu)
                }, aic = function(y, n, mu, wt, dev) NA, link = name.dlink, 
                linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, 
                initialize = init, validmu = function(mu) {
                    all(mu > 0)
                }, valideta = valid.eta))
        }
        else {
            eval(substitute(dfamily <- Gamma(link = lk), list(lk = name.dlink)))
        }
        dlink <- as.character(dfamily$link)
        logdlink <- dlink == "log"
        if (!is.null(call$method)) {
            name.method <- substitute(method)
            if (!is.character(name.method)) 
                name.method <- deparse(name.method)
            list.methods <- c("ml", "reml", "ML", "REML", "Ml", "Reml")
            i.method <- pmatch(method, list.methods, nomatch = 0)
            if (!i.method) 
                stop("Method must be ml or reml")
            method <- switch(i.method, "ml", "reml", "ml", "reml", 
                "ml", "reml")
        }
        reml <- method == "reml"
        if (is.null(mustart)) {
            etastart <- NULL
            eval(family$initialize)
            mu <- mustart
            mustart <- NULL
        }
        if (!is.null(betastart)) {
            eta <- X %*% betastart
            mu <- family$linkinv(eta + offset)
        }
        else {
            if (!is.null(mustart)) {
                mu <- mustart
                eta <- family$linkfun(mu) - offset
            }
            else {
                eta <- lm.fit(X, family$linkfun(mu) - offset, singular.ok = TRUE)$fitted.values
                mu <- family$linkinv(eta + offset)
            }
        }
        d <- family$dev.resids(y, mu, weights)
        if (!is.null(phistart)) {
            phi <- phistart
            deta <- dfamily$linkfun(phi) - doffset
        }
        else {
            deta <- lm.fit(Z, dfamily$linkfun(d + (d == 0)/6) - doffset, 
                singular.ok = TRUE)$fitted.values
            if (logdlink) 
                deta <- deta + 1.27036
            phi <- dfamily$linkinv(deta + offset)
        }
        if (any(phi <= 0)) {
            cat("Some values for  phi  are non-positive, suggesting an inappropriate model", 
                "Try a different link function.\n")
        }
        zm <- as.vector(eta + (y - mu)/family$mu.eta(eta))
        wm <- as.vector(eval(family$variance(mu)) * weights/phi)
        mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
        eta <- mfit$fitted.values
        mu <- family$linkinv(eta + offset)
        cat("family:", family$family, "\n")
        if (family$family == "Tweedie") {
            cat("p:", tweedie.p, "\n")
            if ((tweedie.p > 0) & (any(mu < 0))) {
                cat("Some values for  mu  are negative, suggesting an inappropriate model.", 
                    "Try a different link function.\n")
            }
        }
        d <- family$dev.resids(y, mu, weights)
        const <- dglm.constant(y, family, weights)
        if (Digamma) {
            h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) * 
                weights/phi)
        }
        else {
            h <- log(phi/weights)
        }
        m2loglik <- const + sum(h + d/phi)
        if (reml) 
            m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
        m2loglikold <- m2loglik + 1
        epsilon <- control$epsilon
        maxit <- control$maxit
        trace <- control$trace
        iter <- 0
        while (abs(m2loglikold - m2loglik)/(abs(m2loglikold) + 1) > 
            epsilon && iter < maxit) {
            hdot <- 1/dfamily$mu.eta(deta)
            if (Digamma) {
                delta <- 2 * weights * (log(weights/phi) - digamma(weights/phi))
                u <- 2 * weights^2 * (trigamma(weights/phi) - phi/weights)
                fdot <- phi^2/u * hdot
            }
            else {
                delta <- phi
                u <- phi^2
                fdot <- hdot
            }
            wd <- 1/(fdot^2 * u)
            if (reml) {
                h <- hat(mfit$qr)
                delta <- delta - phi * h
                wd <- wd - 2 * (h/hdot^2/phi^2) + h^2
            }
            if (any(wd < 0)) {
                cat(" Some weights are negative; temporarily fixing.  This may be a sign of an inappropriate model.\n")
                wd[wd < 0] <- 0
            }
            if (any(is.infinite(wd))) {
                cat(" Some weights are negative; temporarily fixing.  This may be a sign of an inappropriate model.\n")
                wd[is.infinite(wd)] <- 100
            }
            zd <- deta + (d - delta) * fdot
            dfit <- lm.wfit(Z, zd, wd, method = "qr", singular.ok = TRUE)
            deta <- dfit$fitted.values
            phi <- dfamily$linkinv(deta + doffset)
            if (any(is.infinite(phi))) {
                cat("*** Some values for  phi  are infinite, suggesting an inappropriate model", 
                    "Try a different link function.  Making an attempt to continue...\n")
                phi[is.infinite(phi)] <- 10
            }
            zm <- eta + (y - mu)/family$mu.eta(eta)
            fam.wt <- expression(weights * family$variance(mu))
            wm <- eval(fam.wt)/phi
            mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE)
            eta <- mfit$fitted.values
            mu <- family$linkinv(eta + offset)
            if (family$family == "Tweedie") {
                if ((tweedie.p > 0) & (any(mu < 0))) {
                    cat("*** Some values for  mu  are negative, suggesting an inappropriate model.", 
                      "Try a different link function.  Making an attempt to continue...\n")
                    mu[mu <= 0] <- 1
                }
            }
            d <- family$dev.resids(y, mu, weights)
            m2loglikold <- m2loglik
            if (Digamma) {
                h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) * 
                    weights/phi)
            }
            else {
                h <- log(phi/weights)
            }
            m2loglik <- const + sum(h + d/phi)
            if (reml) {
                m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R))))
            }
            iter <- iter + 1
            if (trace) 
                cat("DGLM iteration ", iter, ": -2*log-likelihood = ", 
                    format(round(m2loglik, 4)), " \n", sep = "")
        }
        mfit$formula <- call$formula
        mfit$call <- call
        mfit$family <- family
        mfit$linear.predictors <- mfit$fitted.values + offset
        mfit$fitted.values <- mu
        mfit$prior.weights <- weights
        mfit$terms <- mterms
        mfit$contrasts <- attr(X, "contrasts")
        intercept <- attr(mterms, "intercept")
        mfit$df.null <- N - sum(weights == 0) - as.integer(intercept)
        mfit$call <- call
        mfit$deviance <- sum(d/phi)
        mfit$aic <- NA
        mfit$null.deviance <- glm.fit(x = X, y = y, weights = weights/phi, 
            offset = offset, family = family)
        if (length(mfit$null.deviance) > 1) 
            mfit$null.deviance <- mfit$null.deviance$null.deviance
        if (ykeep) 
            mfit$y <- y
        if (xkeep) 
            mfit$x <- X
        class(mfit) <- c("glm", "lm")
        dfit$family <- dfamily
        dfit$prior.weights <- rep(1, N)
        dfit$linear.predictors <- dfit$fitted.values + doffset
        dfit$fitted.values <- phi
        dfit$terms <- dterms
        dfit$aic <- NA
        call$formula <- call$dformula
        call$dformula <- NULL
        call$family <- call(dfamily$family, link = name.dlink)
        dfit$call <- call
        dfit$residuals <- dfamily$dev.resid(d, phi, wt = rep(1/2, 
            N))
        dfit$deviance <- sum(dfit$residuals)
        dfit$null.deviance <- glm.fit(x = Z, y = d, weights = rep(1/2, 
            N), offset = doffset, family = dfamily)
        if (length(dfit$null.deviance) > 1) 
            dfit$null.deviance <- dfit$null.deviance$null.deviance
        if (ykeep) 
            dfit$y <- d
        if (zkeep) 
            dfit$z <- Z
        dfit$formula <- as.vector(attr(dterms, "formula"))
        dfit$iter <- iter
        class(dfit) <- c("glm", "lm")
        out <- c(mfit, list(dispersion.fit = dfit, iter = iter, method = method, 
            m2loglik = m2loglik))
        class(out) <- c("dglm", "glm", "lm")
        out
    }
    

    And then run it like this:

    dglm.nograpes(y~x,~x,family=tweedie(link.power=0, var.power=p))