Skip to content

Instantly share code, notes, and snippets.

@gavinsimpson
Last active January 1, 2026 11:11
Show Gist options
  • Select an option

  • Save gavinsimpson/8a0f0e072b095295cf5f7af2762e05a7 to your computer and use it in GitHub Desktop.

Select an option

Save gavinsimpson/8a0f0e072b095295cf5f7af2762e05a7 to your computer and use it in GitHub Desktop.
A quick R script I knocked up to compare the glmmTMB and mgcv packages for fitting zero-inflated GLMMs to the Salamander and Owls data sets from Brooks et al (2017)
## Compare Brooks et al glmmTMB paper with mgcv
## Packages
library("glmmTMB")
library("mgcv")
library("ggplot2")
theme_set(theme_bw())
library("ggstance")
## Salamander
data(Salamanders, package = "glmmTMB")
## Poisson Models
pgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML")
pgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML")
pgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML")
pm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = poisson)
pm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = poisson)
pm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = poisson)
AIC(pgam0, pgam1, pgam2)
AIC(pm0, pm1, pm2)
## Negative binomial models
nbgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML")
nbgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML")
nbgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML")
nbm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = nbinom2)
nbm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = nbinom2)
nbm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = nbinom2)
AIC(nbgam0, nbgam1, nbgam2)
AIC(nbm0, nbm1, nbm2)
## Zero-inflated Poisson
## mgcv's ziplss can only fit using REML
zipgam0 <- gam(list(count ~ spp + s(site, bs = "re"), ~ spp),
data = Salamanders, family = ziplss, method = "REML")
zipgam1 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp),
data = Salamanders, family = ziplss, method = "REML")
zipgam2 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp + mined),
data = Salamanders, family = ziplss, method = "REML")
zipgam3 <- gam(list(count ~ spp * mined + s(site, bs = "re"), ~ spp * mined),
data = Salamanders, family = ziplss, method = "REML")
## check the things converged
zipgam0$outer.info
zipgam1$outer.info
zipgam2$outer.info
zipgam3$outer.info
zipm0 <- glmmTMB(count ~ spp + (1 | site), zi = ~ spp, data = Salamanders, family = poisson)
zipm1 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp, data = Salamanders, family = poisson)
zipm2 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp + mined, data = Salamanders, family = poisson)
zipm3 <- glmmTMB(count ~ spp * mined + (1 | site), zi = ~ spp * mined, data = Salamanders, family = poisson)
AIC(zipgam0, zipgam1, zipgam2, zipgam3)
AIC(zipm0, zipm1, zipm2, zipm3)
## Newdata
newd0 <- newd <- as.data.frame(cbind(unique(Salamanders[, c("mined","spp")]), site = "R -1"))
rownames(newd0) <- rownames(newd) <- NULL
pred <- predict(zipgam3, newd, exclude = "s(site)", type = "link")
beta <- coef(zipgam3)
consts <- beta[grep("Intercept", names(beta))]
ilink <- function(eta) {
## from stats::binomial(link = cloglog)$linkinv
pmax(pmin(-expm1(-exp(eta)), 1 - .Machine$double.eps), .Machine$double.eps)
}
newd <- transform(newd, fitted = exp(pred[,1]) * ilink(pred[,2]))
ggplot(newd, aes(x = spp, y = fitted, colour = mined)) +
geom_point()
## Owls
data(Owls, package = "glmmTMB")
names(Owls) <- sub("SiblingNegotiation", "NCalls", names(Owls))
Owls <- transform(Owls, cArrivalTime = ArrivalTime - mean(ArrivalTime))
### constant zero-inflation
system.time({m1.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest),
ziformula = ~ 1, data = Owls, family = poisson)})
## mgcv's ziplss can only fit using REML
system.time({m1.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"),
~ 1), data = Owls, family = ziplss(), method = "REML")})
createCoeftab <- function(TMB, GAM) {
bTMB <- fixef(TMB)$cond[-1]
bGAM <- coef(GAM)[2:6]
seTMB <- diag(vcov(TMB)$cond)[-1]
seGAM <- diag(vcov(GAM))[2:6]
nms <- names(bTMB)
nms <- sub("FoodTreatment", "FT", nms)
nms <- sub("cArrivalTime", "ArrivalTime", nms)
df <- data.frame(model = rep(c("glmmTMB", "mgcv::gam"), each = 5),
term = rep(nms, 2),
estimate = unname(c(bTMB, bGAM)))
df <- transform(df,
upper = estimate + sqrt(c(seTMB, seGAM)),
lower = estimate - sqrt(c(seTMB, seGAM)))
df
}
m1.coefs <- createCoeftab(m1.tmb, m1.gam)
p1 <- ggplot(m1.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) +
geom_pointrangeh(position = position_dodgev(height = 0.3)) +
labs(y = NULL,
x = "Regression estimate",
title = "Comparing mgcv with glmmTMB",
subtitle = "Owls: ZIP with constant zero-inflation",
caption = "Bars are ±1 SE")
### Complex zero-inflation
system.time({m2.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest),
ziformula = ~ FoodTreatment + (1 | Nest), data = Owls, family = poisson)})
## mgcv's ziplss can only fit using REML
system.time({m2.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"),
~ FoodTreatment + s(Nest, bs = "re")), data = Owls, family = ziplss(), method = "REML")})
m2.coefs <- createCoeftab(m2.tmb, m2.gam)
p2 <- ggplot(m2.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) +
geom_pointrangeh(position = position_dodgev(height = 0.3)) +
labs(y = NULL,
x = "Regression estimate",
title = "Comparing mgcv with glmmTMB",
subtitle = "Owls: ZIP with complex zero-inflation",
caption = "Bars are ±1 SE")
ggsave("~/Downloads/owls-comparison-simple-zip.png", p1, width = 7, height = 7, dpi = 150)
ggsave("~/Downloads/owls-comparison-complex-zip.png", p2, width = 7, height = 7, dpi = 150)
@ashander
Copy link

ashander commented May 3, 2017

here's an idea for how to try out bigger datasets:

Salamanders$original_site = as.character(Salamanders$site)                                                                                                                                                         
                                                                                                                                                                                                                   
big_sals <- lapply(1:100, function(i) {                                                                                                                                                                            
               Salamanders$site = paste0(Salamanders$original_site, i)                                                                                                                                             
               Salamanders})                                                                                                                                                                                       
Salamanders <- do.call(rbind, big_sals)                                                                                                                                                                            
Salamanders$site <- factor(Salamanders$site)            

@mebrooks
Copy link

mebrooks commented May 3, 2017

Here's the code for the benchmarking in the glmmTMB manuscript. I simulated the datasets for figures A.1 and A.3 using simulate.glmmTMB. By default, the function resamples from the random effect distribution, but we might make that optional in the future and allow conditioning on the estimated site effects.

@bbolker
Copy link

bbolker commented May 3, 2017

Just to clarify: as far as I can tell from ?ziplss, mgcv can do ZIP but not ZINB ... right?

@gavinsimpson
Copy link
Author

gavinsimpson commented May 3, 2017

@bbolker Yes. mgcv currently has:

  1. nb() for negative binomial with theta estimated (like lme4::glmm.nb())
  2. Zip() for zero-inflated (hurdle) Poisson with simple intercept-only model for zero-inflation
  3. ziplss() for zero-inflated (hurdle) Poisson with linear predictors for the Poisson and zero-inflation parts.

NB + dispersion, ZINB, and hurdle models true ZIP models are not supported.

Also, I think for ziplss(), method = "ML" is ignored and hence for those models fitting is via Laplace approximate REML only (see ?mgcv::family.mgcv`).

@mebrooks
Copy link

mebrooks commented May 5, 2017

Thanks for letting us know about this. I'll add mgcv to the next draft of the manuscript.

I added mgcv to the negative binomial timing comparisons. It's definitely faster for the original data structure, but not with many more random effect levels. I'm guessing that TMB's automatic sparseness detection gives it the advantage.

@fhui28
Copy link

fhui28 commented Jan 1, 2026

Hi all,

Apologies for revising this very old conversation, and I am not sure this is the right forum to do this. But seeing as some of the "big guns" of mgcv and glmmTMB are in this dialogue, then I might as well...

*Note this does not affect the glmmTMB paper https://journal.r-project.org/articles/RJ-2017-066/ as I do not think this ended up with ZIP models.

One of the comparisons made above was between ZIP models fitted using mgcv versus glmmTMB. But I don't believe this is a apples-vs-apples comparions because technically (you can correct me if I am wrong @gavinsimpson !), ziplss in mgcv fits a hurdle Poisson and not a zero-inflated Poisson. The name is confusing, and personally I do not think Simon's documentation for ziplss makes it clear either, but I am pretty sure it fits a hurdle and not a zero-inflated.

One empirical way we can verify this is to fit models to a full count dataset that has zeros, and then fit the same model to a subset containing only non-zeros. Under a ZIP model the estimates for the Poisson part will differ, but under a hurdle Poisson model the estimates for the Poisson part should remain the same.

## Salamander
data(Salamanders, package = "glmmTMB")

## Actual ZIP model
zipm0 <- glmmTMB(count ~ spp * mined, zi = ~ 1, data = Salamanders, family = poisson)
zipm01 <- glmmTMB(count ~ spp * mined, zi = ~ 1, data = Salamanders[Salamanders$count > 0,], family = poisson)
fixef(zipm0) 
fixef(zipm01)
#' coefs change between the two count component models because both components of a ZIP are sensitive to the zeros


## mgcv's ziplss 
zipgam0 <- gam(list(count ~ spp * mined, ~ 1), data = Salamanders, family = ziplss, method = "REML")
zipgam01 <- gam(list(count ~ spp * mined, ~ 1), data = Salamanders[Salamanders$count > 0,], family = ziplss, method = "REML")
zipgam0$coefficients
zipgam01$coefficients
#' coefs *do not* change between the two count component models because both the count component of a hurdle Poisson only depends on the non-zeros. The zero component intercept obvious changes.

Hopefully I am not going crazy!

@gavinsimpson
Copy link
Author

You are quite right @fhui28; it’s zero-inflated through the hurdle only. I don’t think this was at all clear from the original help page for this family which only more recently had it mentioned “hurdle”.

@fhui28
Copy link

fhui28 commented Jan 1, 2026

Thanks @gavinsimpson,

Personally I still don't think it is that clear on the current help file page for ziplsss -- much of the general stats literature as well as the species distribution modeling literature think of zero-inflated models and hurdle models as two distinct classes of models, rather than the latter being a special case of the former.

If the maths was written out a little more on the help file that would, well, help. But whatever...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment