library(vcdExtra)
library(MASS)
data("UCBAdmissions")
structable(Dept ~ Admit+Gender, UCBAdmissions)
## Dept A B C D E F
## Admit Gender
## Admitted Male 512 353 120 138 53 22
## Female 89 17 202 131 94 24
## Rejected Male 313 207 205 279 138 351
## Female 19 8 391 244 299 317
berk.mod0 <- loglm(~ Dept + Gender + Admit, data=UCBAdmissions)
berk.mod0
## Call:
## loglm(formula = ~Dept + Gender + Admit, data = UCBAdmissions)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 2097.671 16 0
## Pearson 2000.328 16 0
mosaic(berk.mod0, gp=shading_Friendly)
berk.mod1 <- loglm(~ Dept * (Gender + Admit), data=UCBAdmissions)
berk.mod1
## Call:
## loglm(formula = ~Dept * (Gender + Admit), data = UCBAdmissions)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 21.73551 6 0.001351993
## Pearson 19.93841 6 0.002840164
mosaic(berk.mod1, gp=shading_Friendly)
berk.mod2 <-loglm(~(Admit+Dept+Gender)^2, data=UCBAdmissions)
berk.mod2
## Call:
## loglm(formula = ~(Admit + Dept + Gender)^2, data = UCBAdmissions)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 20.20429 5 0.001144069
## Pearson 18.82255 5 0.002074020
mosaic(berk.mod2, gp=shading_Friendly)
anova(berk.mod0, berk.mod1, berk.mod2, test="Chisq")
## LR tests for hierarchical log-linear models
##
## Model 1:
## ~Dept + Gender + Admit
## Model 2:
## ~Dept * (Gender + Admit)
## Model 3:
## ~(Admit + Dept + Gender)^2
##
## Deviance df Delta(Dev) Delta(df) P(> Delta(Dev)
## Model 1 2097.67121 16
## Model 2 21.73551 6 2075.935706 10 0.00000
## Model 3 20.20429 5 1.531213 1 0.21593
## Saturated 0.00000 0 20.204294 5 0.00114
##################
Need to transform the data to a frequency data.frame
berkeley <- as.data.frame(UCBAdmissions)
berk.glm1 <- glm(Freq ~ Dept * (Gender+Admit), data=berkeley, family="poisson")
summary(berk.glm1)
##
## Call:
## glm(formula = Freq ~ Dept * (Gender + Admit), family = "poisson",
## data = berkeley)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4776 -0.4144 0.0098 0.3089 2.2321
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.27557 0.04248 147.744 < 2e-16 ***
## DeptB -0.40575 0.06770 -5.993 2.06e-09 ***
## DeptC -1.53939 0.08305 -18.536 < 2e-16 ***
## DeptD -1.32234 0.08159 -16.207 < 2e-16 ***
## DeptE -2.40277 0.11014 -21.816 < 2e-16 ***
## DeptF -3.09624 0.15756 -19.652 < 2e-16 ***
## GenderFemale -2.03325 0.10233 -19.870 < 2e-16 ***
## AdmitRejected -0.59346 0.06838 -8.679 < 2e-16 ***
## DeptB:GenderFemale -1.07581 0.22860 -4.706 2.52e-06 ***
## DeptC:GenderFemale 2.63462 0.12343 21.345 < 2e-16 ***
## DeptD:GenderFemale 1.92709 0.12464 15.461 < 2e-16 ***
## DeptE:GenderFemale 2.75479 0.13510 20.391 < 2e-16 ***
## DeptF:GenderFemale 1.94356 0.12683 15.325 < 2e-16 ***
## DeptB:AdmitRejected 0.05059 0.10968 0.461 0.645
## DeptC:AdmitRejected 1.20915 0.09726 12.432 < 2e-16 ***
## DeptD:AdmitRejected 1.25833 0.10152 12.395 < 2e-16 ***
## DeptE:AdmitRejected 1.68296 0.11733 14.343 < 2e-16 ***
## DeptF:AdmitRejected 3.26911 0.16707 19.567 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2650.095 on 23 degrees of freedom
## Residual deviance: 21.736 on 6 degrees of freedom
## AIC: 216.8
##
## Number of Fisher Scoring iterations: 4
note use of formula
to reorder factors in the mosaic
mosaic(berk.glm1, gp=shading_Friendly,
labeling=labeling_residuals,
formula=~Admit+Dept+Gender)
the same, displaying studentized residuals.
mosaic(berk.glm1, residuals_type="rstandard",
labeling=labeling_residuals, shade=TRUE,
formula=~Admit+Dept+Gender,
main="Model: [DeptGender][DeptAdmit]")
## all two-way model
berk.glm2 <- glm(Freq ~ (Dept + Gender + Admit)^2, data=berkeley, family="poisson")
summary(berk.glm2)
##
## Call:
## glm(formula = Freq ~ (Dept + Gender + Admit)^2, family = "poisson",
## data = berkeley)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7 8 9 10 11
## -0.75481 0.99471 1.96454 -3.15768 -0.03402 0.04449 0.15709 -0.22034 1.01273 -0.73839 -0.74367
## 12 13 14 15 16 17 18 19 20 21 22
## 0.54896 0.06760 -0.04741 -0.06911 0.05080 1.05578 -0.61236 -0.73617 0.42678 -0.20117 0.05113
## 23 24
## 0.19803 -0.05370
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.27150 0.04271 146.855 < 2e-16 ***
## DeptB -0.40322 0.06784 -5.944 2.78e-09 ***
## DeptC -1.57790 0.08949 -17.632 < 2e-16 ***
## DeptD -1.35000 0.08526 -15.834 < 2e-16 ***
## DeptE -2.44982 0.11755 -20.840 < 2e-16 ***
## DeptF -3.13787 0.16174 -19.401 < 2e-16 ***
## GenderFemale -1.99859 0.10593 -18.866 < 2e-16 ***
## AdmitRejected -0.58205 0.06899 -8.436 < 2e-16 ***
## DeptB:GenderFemale -1.07482 0.22861 -4.701 2.58e-06 ***
## DeptC:GenderFemale 2.66513 0.12609 21.137 < 2e-16 ***
## DeptD:GenderFemale 1.95832 0.12734 15.379 < 2e-16 ***
## DeptE:GenderFemale 2.79519 0.13925 20.073 < 2e-16 ***
## DeptF:GenderFemale 2.00232 0.13571 14.754 < 2e-16 ***
## DeptB:AdmitRejected 0.04340 0.10984 0.395 0.693
## DeptC:AdmitRejected 1.26260 0.10663 11.841 < 2e-16 ***
## DeptD:AdmitRejected 1.29461 0.10582 12.234 < 2e-16 ***
## DeptE:AdmitRejected 1.73931 0.12611 13.792 < 2e-16 ***
## DeptF:AdmitRejected 3.30648 0.16998 19.452 < 2e-16 ***
## GenderFemale:AdmitRejected -0.09987 0.08085 -1.235 0.217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2650.095 on 23 degrees of freedom
## Residual deviance: 20.204 on 5 degrees of freedom
## AIC: 217.26
##
## Number of Fisher Scoring iterations: 4
mosaic.glm(berk.glm2, residuals_type="rstandard",
labeling = labeling_residuals, shade=TRUE,
formula=~Admit+Dept+Gender,
main="Model: [DeptGender][DeptAdmit][AdmitGender]")
anova(berk.glm1, berk.glm2, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: Freq ~ Dept * (Gender + Admit)
## Model 2: Freq ~ (Dept + Gender + Admit)^2
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 6 21.735
## 2 5 20.204 1 1.5312 0.2159
berkeley <- within(berkeley, dept1AG <- (Dept=='A')*(Gender=='Female')*(Admit=='Admitted'))
berkeley[1:6,]
## Admit Gender Dept Freq dept1AG
## 1 Admitted Male A 512 0
## 2 Rejected Male A 313 0
## 3 Admitted Female A 89 1
## 4 Rejected Female A 19 0
## 5 Admitted Male B 353 0
## 6 Rejected Male B 207 0
berk.glm3 <- glm(Freq ~ Dept * (Gender+Admit) + dept1AG,
data=berkeley, family="poisson")
summary(berk.glm3)
##
## Call:
## glm(formula = Freq ~ Dept * (Gender + Admit) + dept1AG, family = "poisson",
## data = berkeley)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7 8 9 10 11
## 0.00000 0.00000 0.00000 0.00000 -0.06316 0.08273 0.29514 -0.40088 0.55733 -0.41519 -0.41820
## 12 13 14 15 16 17 18 19 20 21 22
## 0.30511 -0.30655 0.21843 0.32036 -0.23141 0.69837 -0.41419 -0.49916 0.28628 -0.42032 0.10861
## 23 24
## 0.42684 -0.11382
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.23832 0.04419 141.157 < 2e-16 ***
## DeptB -0.36850 0.06879 -5.357 8.47e-08 ***
## DeptC -1.50215 0.08394 -17.895 < 2e-16 ***
## DeptD -1.28509 0.08250 -15.577 < 2e-16 ***
## DeptE -2.36552 0.11081 -21.347 < 2e-16 ***
## DeptF -3.05899 0.15803 -19.357 < 2e-16 ***
## GenderFemale -2.80176 0.23628 -11.858 < 2e-16 ***
## AdmitRejected -0.49212 0.07175 -6.859 6.94e-12 ***
## dept1AG 1.05208 0.26271 4.005 6.21e-05 ***
## DeptB:GenderFemale -0.30730 0.31243 -0.984 0.325
## DeptC:GenderFemale 3.40313 0.24615 13.825 < 2e-16 ***
## DeptD:GenderFemale 2.69560 0.24676 10.924 < 2e-16 ***
## DeptE:GenderFemale 3.52330 0.25220 13.970 < 2e-16 ***
## DeptF:GenderFemale 2.71207 0.24787 10.941 < 2e-16 ***
## DeptB:AdmitRejected -0.05074 0.11181 -0.454 0.650
## DeptC:AdmitRejected 1.10781 0.09966 11.116 < 2e-16 ***
## DeptD:AdmitRejected 1.15699 0.10381 11.145 < 2e-16 ***
## DeptE:AdmitRejected 1.58162 0.11933 13.254 < 2e-16 ***
## DeptF:AdmitRejected 3.16777 0.16848 18.803 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2650.0952 on 23 degrees of freedom
## Residual deviance: 2.6815 on 5 degrees of freedom
## AIC: 199.74
##
## Number of Fisher Scoring iterations: 3
mosaic.glm(berk.glm3, residuals_type="rstandard",
labeling = labeling_residuals, shade=TRUE,
formula=~Admit+Dept+Gender,
main="Model: [DeptGender][DeptAdmit] + DeptA*[GA]")
anova(berk.glm1, berk.glm3, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: Freq ~ Dept * (Gender + Admit)
## Model 2: Freq ~ Dept * (Gender + Admit) + dept1AG
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 6 21.7355
## 2 5 2.6815 1 19.054 1.271e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1