# data from Long (1997)
# see http://data.princeton.edu/wws509/stata/overdispersion.html for Stata analysis
data("PhdPubs", package="vcdExtra")
library(MASS)
library(countreg)
library(vcdExtra)
library(car)
library(lmtest) # for coeftest
hist(PhdPubs$articles, breaks=0:19, col="pink", xlim=c(0,20),
xlab="Number of Articles")
Fit the poisson model
phd.pois <- glm(articles ~ ., data=PhdPubs, family=poisson)
library(car)
Anova(phd.pois)
## Analysis of Deviance Table (Type II tests)
##
## Response: articles
## LR Chisq Df Pr(>Chisq)
## female 17.069 1 3.605e-05 ***
## married 6.633 1 0.01001 *
## kid5 22.102 1 2.586e-06 ***
## phdprestige 1.010 1 0.31500
## mentor 126.776 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
round(cbind(beta = coef(phd.pois),
expbeta = exp(coef(phd.pois)),
pct = 100 * (exp(coef(phd.pois)) - 1)), 3)
## beta expbeta pct
## (Intercept) 0.266 1.304 30.425
## female -0.224 0.799 -20.102
## married 0.157 1.170 17.037
## kid5 -0.185 0.831 -16.882
## phdprestige 0.025 1.026 2.570
## mentor 0.025 1.026 2.555
phd.pois1 <- update(phd.pois, . ~ .^2)
Anova(phd.pois1)
## Analysis of Deviance Table (Type II tests)
##
## Response: articles
## LR Chisq Df Pr(>Chisq)
## female 14.521 1 0.0001386 ***
## married 6.201 1 0.0127677 *
## kid5 19.545 1 9.826e-06 ***
## phdprestige 0.963 1 0.3265451
## mentor 128.112 1 < 2.2e-16 ***
## female:married 0.260 1 0.6099455
## female:kid5 0.120 1 0.7292877
## female:phdprestige 0.224 1 0.6357431
## female:mentor 0.012 1 0.9126026
## married:kid5 0
## married:phdprestige 1.706 1 0.1915350
## married:mentor 1.157 1 0.2820254
## kid5:phdprestige 0.164 1 0.6852251
## kid5:mentor 2.823 1 0.0929044 .
## phdprestige:mentor 3.810 1 0.0509367 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(phd.pois, phd.pois1, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: articles ~ female + married + kid5 + phdprestige + mentor
## Model 2: articles ~ female + married + kid5 + phdprestige + mentor + female:married +
## female:kid5 + female:phdprestige + female:mentor + married:kid5 +
## married:phdprestige + married:mentor + kid5:phdprestige +
## kid5:mentor + phdprestige:mentor
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 909 1633.6
## 2 900 1618.4 9 15.192 0.08578 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
LRstats(phd.pois, phd.pois1)
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313.3 3342.3 1633.6 909 < 2.2e-16 ***
## phd.pois1 3316.1 3388.4 1618.4 900 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## model diagnostics
op <- par(mfrow=c(1,2), mar=c(4,4,2,1)+.1)
plot(phd.pois, which=c(1,5))
par(op)
### component-plus-residual plot for non-linearity
crPlot(phd.pois, "mentor", pch=16, lwd=4, id = list(n=2))
## [1] 913 911 328 803
op <- par(mar=c(4,4,2,1)+.1)
crPlot(phd.pois, "mentor", pch=16, lwd=4, id = list(n=2),
cex.lab=1.4, xlab="mentor publications" )
## [1] 913 911 328 803
text(70, 2.5, paste("b =",round( coef(phd.pois)["mentor"], 3)), col="red", cex=1.5)
par(op)
### Influence plot
influencePlot(phd.pois, id = list(n=2))
## StudRes Hat CookD
## 328 -3.734244 0.193356161 0.36757530
## 803 -1.034076 0.086962320 0.01490683
## 913 5.382111 0.003560164 0.04339156
## 914 5.542302 0.009995654 0.10975550
## 915 5.150435 0.035504369 0.28331178
library(effects)
plot(allEffects(phd.pois))
mean and variance
with(PhdPubs, c(mean=mean(articles),
var=var(articles),
ratio=var(articles)/mean(articles)))
## mean var ratio
## 1.692896 3.709742 2.191358
estimate of phi = dispersion parameter
with(phd.pois, deviance/df.residual)
## [1] 1.797137
sum(residuals(phd.pois, type = "pearson")^2)/phd.pois$df.residual
## [1] 1.830368
Fit the quasi-poisson model
phd.qpois <- glm(articles ~ ., data=PhdPubs, family=quasipoisson)
Fit the negative-binomial model
phd.nbin <- glm.nb(articles ~ ., data=PhdPubs)
Estimate of theta
summary(phd.nbin)$theta
## [1] 2.266961
## compare models
LRstats(phd.pois, phd.qpois, phd.nbin)
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313.3 3342.3 1633.6 909 <2e-16 ***
## phd.qpois 909
## phd.nbin 3135.4 3169.1 1004.1 909 0.0149 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## compare standard errors
phd.SE <- sqrt(cbind(
pois = diag(vcov(phd.pois)),
qpois = diag(vcov(phd.qpois)),
nbin = diag(vcov(phd.nbin))))
round(phd.SE, 4)
## pois qpois nbin
## (Intercept) 0.0996 0.1348 0.1327
## female 0.0546 0.0738 0.0726
## married 0.0613 0.0829 0.0819
## kid5 0.0401 0.0543 0.0528
## phdprestige 0.0253 0.0342 0.0343
## mentor 0.0020 0.0027 0.0032
phd.coef <- cbind(
pois = coef(phd.pois),
qpois = coef(phd.qpois),
nbin = coef(phd.nbin))
round(phd.coef,3)
## pois qpois nbin
## (Intercept) 0.266 0.266 0.213
## female -0.224 -0.224 -0.216
## married 0.157 0.157 0.153
## kid5 -0.185 -0.185 -0.176
## phdprestige 0.025 0.025 0.029
## mentor 0.025 0.025 0.029
round(cbind(beta = coef(phd.nbin),
expbeta = exp(coef(phd.nbin)),
pct = 100 * (exp(coef(phd.nbin)) - 1)), 3)
## beta expbeta pct
## (Intercept) 0.213 1.237 23.732
## female -0.216 0.806 -19.447
## married 0.153 1.165 16.508
## kid5 -0.176 0.838 -16.167
## phdprestige 0.029 1.030 2.978
## mentor 0.029 1.029 2.909
if (!require(countreg)) install.packages("countreg", repos="http://R-Forge.R-project.org")
# library(countreg)
plot(factor(articles==0) ~ mentor, data=PhdPubs,
ylevels=2:1, ylab="Zero articles",
breaks=quantile(mentor, probs=seq(0,1,.2)), cex.lab=1.25)
Fit ZI and hurdle models
phd.zip <- zeroinfl(articles ~ ., data=PhdPubs, dist="poisson")
phd.znb <- zeroinfl(articles ~ ., data=PhdPubs, dist="negbin")
phd.hp <- hurdle(articles ~ ., data=PhdPubs, dist="poisson")
phd.hnb <- hurdle(articles ~ ., data=PhdPubs, dist="negbin")
show coefficients
phd.zip
##
## Call:
## zeroinfl(formula = articles ~ ., data = PhdPubs, dist = "poisson")
##
## Count model coefficients (poisson with log link):
## (Intercept) female married kid5 phdprestige mentor
## 0.599179 -0.208790 0.106230 -0.142706 0.006998 0.017846
##
## Zero-inflation model coefficients (binomial with logit link):
## (Intercept) female married kid5 phdprestige mentor
## -0.563316 0.108162 -0.355580 0.219738 -0.005371 -0.133126
phd.hp
##
## Call:
## hurdle(formula = articles ~ ., data = PhdPubs, dist = "poisson")
##
## Count model coefficients (truncated poisson with log link):
## (Intercept) female married kid5 phdprestige mentor
## 0.63938 -0.22824 0.09899 -0.14173 -0.00279 0.01859
##
## Zero hurdle model coefficients (binomial with logit link):
## (Intercept) female married kid5 phdprestige mentor
## 0.17844 -0.25077 0.32769 -0.28542 0.04281 0.07916
LRstats(phd.pois, phd.nbin, phd.zip, phd.znb, phd.hp, phd.hnb,
sortby="BIC")
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313.3 3342.3 3301.3 909 < 2.2e-16 ***
## phd.hp 3234.5 3292.4 3210.5 903 < 2.2e-16 ***
## phd.zip 3233.5 3291.3 3209.5 903 < 2.2e-16 ***
## phd.hnb 3130.9 3193.5 3104.9 902 < 2.2e-16 ***
## phd.znb 3125.8 3188.4 3099.8 902 < 2.2e-16 ***
## phd.nbin 3135.4 3169.1 3121.4 909 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
NB: only mentor is important in the zero model
coeftest(phd.zip)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## count_(Intercept) 0.5991793 0.1186144 5.0515 5.305e-07 ***
## count_female -0.2087902 0.0635263 -3.2867 0.001053 **
## count_married 0.1062298 0.0709663 1.4969 0.134768
## count_kid5 -0.1427059 0.0474362 -3.0084 0.002699 **
## count_phdprestige 0.0069981 0.0298118 0.2347 0.814461
## count_mentor 0.0178457 0.0023337 7.6471 5.256e-14 ***
## zero_(Intercept) -0.5633164 0.4940503 -1.1402 0.254505
## zero_female 0.1081615 0.2817308 0.3839 0.701130
## zero_married -0.3555798 0.3179619 -1.1183 0.263732
## zero_kid5 0.2197383 0.1965801 1.1178 0.263948
## zero_phdprestige -0.0053714 0.1411827 -0.0380 0.969659
## zero_mentor -0.1331261 0.0464314 -2.8672 0.004238 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(phd.znb)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## count_(Intercept) 0.3654957 0.1398736 2.6130 0.009123 **
## count_female -0.1945997 0.0757710 -2.5683 0.010381 *
## count_married 0.1015024 0.0843342 1.2036 0.229070
## count_kid5 -0.1516356 0.0542427 -2.7955 0.005292 **
## count_phdprestige 0.0156126 0.0345164 0.4523 0.651145
## count_mentor 0.0243967 0.0034978 6.9749 5.919e-12 ***
## zero_(Intercept) -0.2942762 1.2827207 -0.2294 0.818598
## zero_female 0.6579945 0.8556879 0.7690 0.442115
## zero_married -1.4740768 0.9176084 -1.6064 0.108529
## zero_kid5 0.6241456 0.4410421 1.4152 0.157366
## zero_phdprestige -0.0112609 0.2888196 -0.0390 0.968907
## zero_mentor -0.8764770 0.3152265 -2.7805 0.005541 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
phd.zip1 <- zeroinfl(articles ~ .| mentor, data=PhdPubs, dist="poisson")
phd.znb1 <- zeroinfl(articles ~ .| mentor, data=PhdPubs, dist="negbin")
summary(phd.zip1)
##
## Call:
## zeroinfl(formula = articles ~ . | mentor, data = PhdPubs, dist = "poisson")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -2.2952 -0.8713 -0.2620 0.5400 7.2426
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.58819 0.10996 5.349 8.83e-08 ***
## female -0.21791 0.05879 -3.707 0.000210 ***
## married 0.13597 0.06602 2.059 0.039454 *
## kid5 -0.16262 0.04338 -3.749 0.000178 ***
## phdprestige 0.00671 0.02729 0.246 0.805737
## mentor 0.01804 0.00229 7.878 3.32e-15 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.68649 0.20592 -3.334 0.000857 ***
## mentor -0.13027 0.04037 -3.227 0.001250 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 16
## Log-likelihood: -1606 on 8 Df
LRstats(phd.pois, phd.nbin, phd.zip, phd.znb, phd.hp, phd.hnb, phd.zip1, phd.znb1,
sortby="BIC")
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313.3 3342.3 3301.3 909 < 2.2e-16 ***
## phd.hp 3234.5 3292.4 3210.5 903 < 2.2e-16 ***
## phd.zip 3233.5 3291.3 3209.5 903 < 2.2e-16 ***
## phd.zip1 3227.5 3266.0 3211.5 907 < 2.2e-16 ***
## phd.hnb 3130.9 3193.5 3104.9 902 < 2.2e-16 ***
## phd.znb 3125.8 3188.4 3099.8 902 < 2.2e-16 ***
## phd.nbin 3135.4 3169.1 3121.4 909 < 2.2e-16 ***
## phd.znb1 3124.3 3167.7 3106.3 906 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(allEffects(phd.nbin)[c(1,3,5)], rows=1, cols=3)
plot the zero model
phd.zero <- glm((articles==0) ~ mentor, data=PhdPubs, family=binomial)
plot(allEffects(phd.zero), main="Mentor effect on not publishing")