Last active
January 1, 2026 11:11
-
-
Save gavinsimpson/8a0f0e072b095295cf5f7af2762e05a7 to your computer and use it in GitHub Desktop.
A quick R script I knocked up to compare the glmmTMB and mgcv packages for fitting zero-inflated GLMMs to the Salamander and Owls data sets from Brooks et al (2017)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## Compare Brooks et al glmmTMB paper with mgcv | |
| ## Packages | |
| library("glmmTMB") | |
| library("mgcv") | |
| library("ggplot2") | |
| theme_set(theme_bw()) | |
| library("ggstance") | |
| ## Salamander | |
| data(Salamanders, package = "glmmTMB") | |
| ## Poisson Models | |
| pgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML") | |
| pgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML") | |
| pgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML") | |
| pm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = poisson) | |
| pm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = poisson) | |
| pm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = poisson) | |
| AIC(pgam0, pgam1, pgam2) | |
| AIC(pm0, pm1, pm2) | |
| ## Negative binomial models | |
| nbgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML") | |
| nbgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML") | |
| nbgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML") | |
| nbm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = nbinom2) | |
| nbm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = nbinom2) | |
| nbm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = nbinom2) | |
| AIC(nbgam0, nbgam1, nbgam2) | |
| AIC(nbm0, nbm1, nbm2) | |
| ## Zero-inflated Poisson | |
| ## mgcv's ziplss can only fit using REML | |
| zipgam0 <- gam(list(count ~ spp + s(site, bs = "re"), ~ spp), | |
| data = Salamanders, family = ziplss, method = "REML") | |
| zipgam1 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp), | |
| data = Salamanders, family = ziplss, method = "REML") | |
| zipgam2 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp + mined), | |
| data = Salamanders, family = ziplss, method = "REML") | |
| zipgam3 <- gam(list(count ~ spp * mined + s(site, bs = "re"), ~ spp * mined), | |
| data = Salamanders, family = ziplss, method = "REML") | |
| ## check the things converged | |
| zipgam0$outer.info | |
| zipgam1$outer.info | |
| zipgam2$outer.info | |
| zipgam3$outer.info | |
| zipm0 <- glmmTMB(count ~ spp + (1 | site), zi = ~ spp, data = Salamanders, family = poisson) | |
| zipm1 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp, data = Salamanders, family = poisson) | |
| zipm2 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp + mined, data = Salamanders, family = poisson) | |
| zipm3 <- glmmTMB(count ~ spp * mined + (1 | site), zi = ~ spp * mined, data = Salamanders, family = poisson) | |
| AIC(zipgam0, zipgam1, zipgam2, zipgam3) | |
| AIC(zipm0, zipm1, zipm2, zipm3) | |
| ## Newdata | |
| newd0 <- newd <- as.data.frame(cbind(unique(Salamanders[, c("mined","spp")]), site = "R -1")) | |
| rownames(newd0) <- rownames(newd) <- NULL | |
| pred <- predict(zipgam3, newd, exclude = "s(site)", type = "link") | |
| beta <- coef(zipgam3) | |
| consts <- beta[grep("Intercept", names(beta))] | |
| ilink <- function(eta) { | |
| ## from stats::binomial(link = cloglog)$linkinv | |
| pmax(pmin(-expm1(-exp(eta)), 1 - .Machine$double.eps), .Machine$double.eps) | |
| } | |
| newd <- transform(newd, fitted = exp(pred[,1]) * ilink(pred[,2])) | |
| ggplot(newd, aes(x = spp, y = fitted, colour = mined)) + | |
| geom_point() | |
| ## Owls | |
| data(Owls, package = "glmmTMB") | |
| names(Owls) <- sub("SiblingNegotiation", "NCalls", names(Owls)) | |
| Owls <- transform(Owls, cArrivalTime = ArrivalTime - mean(ArrivalTime)) | |
| ### constant zero-inflation | |
| system.time({m1.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest), | |
| ziformula = ~ 1, data = Owls, family = poisson)}) | |
| ## mgcv's ziplss can only fit using REML | |
| system.time({m1.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"), | |
| ~ 1), data = Owls, family = ziplss(), method = "REML")}) | |
| createCoeftab <- function(TMB, GAM) { | |
| bTMB <- fixef(TMB)$cond[-1] | |
| bGAM <- coef(GAM)[2:6] | |
| seTMB <- diag(vcov(TMB)$cond)[-1] | |
| seGAM <- diag(vcov(GAM))[2:6] | |
| nms <- names(bTMB) | |
| nms <- sub("FoodTreatment", "FT", nms) | |
| nms <- sub("cArrivalTime", "ArrivalTime", nms) | |
| df <- data.frame(model = rep(c("glmmTMB", "mgcv::gam"), each = 5), | |
| term = rep(nms, 2), | |
| estimate = unname(c(bTMB, bGAM))) | |
| df <- transform(df, | |
| upper = estimate + sqrt(c(seTMB, seGAM)), | |
| lower = estimate - sqrt(c(seTMB, seGAM))) | |
| df | |
| } | |
| m1.coefs <- createCoeftab(m1.tmb, m1.gam) | |
| p1 <- ggplot(m1.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) + | |
| geom_pointrangeh(position = position_dodgev(height = 0.3)) + | |
| labs(y = NULL, | |
| x = "Regression estimate", | |
| title = "Comparing mgcv with glmmTMB", | |
| subtitle = "Owls: ZIP with constant zero-inflation", | |
| caption = "Bars are ±1 SE") | |
| ### Complex zero-inflation | |
| system.time({m2.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest), | |
| ziformula = ~ FoodTreatment + (1 | Nest), data = Owls, family = poisson)}) | |
| ## mgcv's ziplss can only fit using REML | |
| system.time({m2.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"), | |
| ~ FoodTreatment + s(Nest, bs = "re")), data = Owls, family = ziplss(), method = "REML")}) | |
| m2.coefs <- createCoeftab(m2.tmb, m2.gam) | |
| p2 <- ggplot(m2.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) + | |
| geom_pointrangeh(position = position_dodgev(height = 0.3)) + | |
| labs(y = NULL, | |
| x = "Regression estimate", | |
| title = "Comparing mgcv with glmmTMB", | |
| subtitle = "Owls: ZIP with complex zero-inflation", | |
| caption = "Bars are ±1 SE") | |
| ggsave("~/Downloads/owls-comparison-simple-zip.png", p1, width = 7, height = 7, dpi = 150) | |
| ggsave("~/Downloads/owls-comparison-complex-zip.png", p2, width = 7, height = 7, dpi = 150) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks @gavinsimpson,
Personally I still don't think it is that clear on the current help file page for
ziplsss-- much of the general stats literature as well as the species distribution modeling literature think of zero-inflated models and hurdle models as two distinct classes of models, rather than the latter being a special case of the former.If the maths was written out a little more on the help file that would, well, help. But whatever...