library(car) # for Anova()
library(ggplot2)
library(effects) # effect plots
Load the data
data(UCBAdmissions)
berkeley <- as.data.frame(UCBAdmissions)
Logit model for Dept
berk.logit1 <- glm(Admit=="Admitted" ~ Dept, data=berkeley,
weights=Freq, family="binomial")
Anova(berk.logit1, test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: Admit == "Admitted"
## Df Chisq Pr(>Chisq)
## Dept 5 623.03 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(berk.logit1)
##
## Call:
## glm(formula = Admit == "Admitted" ~ Dept, family = "binomial",
## data = berkeley, weights = Freq)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -25.4328 -13.2031 -0.0277 15.9185 21.2218
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.59346 0.06838 8.679 <2e-16 ***
## DeptB -0.05059 0.10968 -0.461 0.645
## DeptC -1.20915 0.09726 -12.432 <2e-16 ***
## DeptD -1.25833 0.10152 -12.395 <2e-16 ***
## DeptE -1.68296 0.11733 -14.343 <2e-16 ***
## DeptF -3.26911 0.16707 -19.567 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6044.3 on 23 degrees of freedom
## Residual deviance: 5189.0 on 18 degrees of freedom
## AIC: 5201
##
## Number of Fisher Scoring iterations: 6
Main effects model
berk.logit2 <- glm(Admit=="Admitted" ~ Dept+Gender,
data=berkeley, weights=Freq, family="binomial")
Anova(berk.logit2, test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: Admit == "Admitted"
## Df Chisq Pr(>Chisq)
## Dept 5 534.708 <2e-16 ***
## Gender 1 1.526 0.2167
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(berk.logit2)
##
## Call:
## glm(formula = Admit == "Admitted" ~ Dept + Gender, family = "binomial",
## data = berkeley, weights = Freq)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -25.3424 -13.0584 -0.1631 16.0167 21.3199
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.58205 0.06899 8.436 <2e-16 ***
## DeptB -0.04340 0.10984 -0.395 0.693
## DeptC -1.26260 0.10663 -11.841 <2e-16 ***
## DeptD -1.29461 0.10582 -12.234 <2e-16 ***
## DeptE -1.73931 0.12611 -13.792 <2e-16 ***
## DeptF -3.30648 0.16998 -19.452 <2e-16 ***
## GenderFemale 0.09987 0.08085 1.235 0.217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6044.3 on 23 degrees of freedom
## Residual deviance: 5187.5 on 17 degrees of freedom
## AIC: 5201.5
##
## Number of Fisher Scoring iterations: 6
Plots for logit models
obs <- log(UCBAdmissions[1,,] / UCBAdmissions[2,,])
pred2 <- cbind(berkeley[,1:3], fit=predict(berk.logit2))
pred2 <- cbind(subset(pred2, Admit=="Admitted"), obs=as.vector(obs))
head(pred2)
## Admit Gender Dept fit obs
## 1 Admitted Male A 0.5820514 0.4921214
## 3 Admitted Female A 0.6819215 1.5441974
## 5 Admitted Male B 0.5386535 0.5337493
## 7 Admitted Female B 0.6385236 0.7537718
## 9 Admitted Male C -0.6805466 -0.5355182
## 11 Admitted Female C -0.5806765 -0.6604399
ggplot(pred2, aes(x=Dept, y=fit, group=Gender, color=Gender)) +
geom_line(size=1.4) +
geom_point(aes(y=obs), size=3) +
labs(y="Log odds (Admitted") +
theme_bw(base_size = 16) +
theme(legend.position = c(.75, .80))
Effect plots
berk.eff2 <- allEffects(berk.logit2)
plot main effects
plot(berk.eff2)
# plot 'interaction'
plot(effect('Dept:Gender', berk.logit2), multiline=TRUE)
include a 1 df term for dept A
berkeley <- within(berkeley, dept1AG <- (Dept=='A')*(Gender=='Female'))
berk.logit3 <- glm(Admit=="Admitted" ~ Dept + Gender + dept1AG,
data=berkeley, weights=Freq, family="binomial")
Anova(berk.logit3, test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: Admit == "Admitted"
## Df Chisq Pr(>Chisq)
## Dept 5 445.1207 < 2.2e-16 ***
## Gender 1 0.1251 0.7235
## dept1AG 1 15.3167 9.091e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(effect('Dept:Gender', berk.logit3), multiline=TRUE)
obs <- log(UCBAdmissions[1,,] / UCBAdmissions[2,,])
pred3 <- cbind(berkeley[,1:3], fit=predict(berk.logit3))
pred3 <- cbind(subset(pred3, Admit=="Admitted"), obs=as.vector(obs))
head(pred3)
## Admit Gender Dept fit obs
## 1 Admitted Male A 0.4921214 0.4921214
## 3 Admitted Female A 1.5441974 1.5441974
## 5 Admitted Male B 0.5441816 0.5337493
## 7 Admitted Female B 0.5134898 0.7537718
## 9 Admitted Male C -0.5958952 -0.5355182
## 11 Admitted Female C -0.6265870 -0.6604399
ggplot(pred3, aes(x=Dept, y=fit, group=Gender, color=Gender)) +
geom_line(size=1.4) +
geom_point(aes(y=obs), size=3) +
labs(y="Log odds (Admitted") +
theme_bw(base_size = 16) +
theme(legend.position = c(.75, .80))
IycgLS0tDQojJyB0aXRsZTogIlVDQiBkYXRhOiBsb2dpdCBtb2RlbHMgZm9yIGFkbWlzc2lvbiINCiMnIGF1dGhvcjogIk1pY2hhZWwgRnJpZW5kbHkiDQojJyBkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCINCiMnIG91dHB1dDoNCiMnICAgaHRtbF9kb2N1bWVudDoNCiMnICAgICB0aGVtZTogcmVhZGFibGUNCiMnICAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQojJyAtLS0NCg0KbGlicmFyeShjYXIpICAgICAgICMgZm9yIEFub3ZhKCkNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZWZmZWN0cykgICAjIGVmZmVjdCBwbG90cw0KDQojJyAjIyBMb2FkIHRoZSBkYXRhDQpkYXRhKFVDQkFkbWlzc2lvbnMpDQpiZXJrZWxleSA8LSBhcy5kYXRhLmZyYW1lKFVDQkFkbWlzc2lvbnMpDQoNCiMnICMjIExvZ2l0IG1vZGVsIGZvciBEZXB0DQpiZXJrLmxvZ2l0MSA8LSBnbG0oQWRtaXQ9PSJBZG1pdHRlZCIgfiBEZXB0LCBkYXRhPWJlcmtlbGV5LCANCiAgICAgICAgICAgICAgICAgd2VpZ2h0cz1GcmVxLCBmYW1pbHk9ImJpbm9taWFsIikNCkFub3ZhKGJlcmsubG9naXQxLCB0ZXN0PSJXYWxkIikNCnN1bW1hcnkoYmVyay5sb2dpdDEpDQoNCiMnICMjIE1haW4gZWZmZWN0cyBtb2RlbA0KYmVyay5sb2dpdDIgPC0gZ2xtKEFkbWl0PT0iQWRtaXR0ZWQiIH4gRGVwdCtHZW5kZXIsIA0KICAgICAgICAgICAgICAgICBkYXRhPWJlcmtlbGV5LCB3ZWlnaHRzPUZyZXEsIGZhbWlseT0iYmlub21pYWwiKQ0KQW5vdmEoYmVyay5sb2dpdDIsIHRlc3Q9IldhbGQiKQ0Kc3VtbWFyeShiZXJrLmxvZ2l0MikNCg0KIycgIyMgUGxvdHMgZm9yIGxvZ2l0IG1vZGVscw0Kb2JzIDwtIGxvZyhVQ0JBZG1pc3Npb25zWzEsLF0gLyBVQ0JBZG1pc3Npb25zWzIsLF0pDQpwcmVkMiA8LSBjYmluZChiZXJrZWxleVssMTozXSwgZml0PXByZWRpY3QoYmVyay5sb2dpdDIpKQ0KcHJlZDIgPC0gY2JpbmQoc3Vic2V0KHByZWQyLCBBZG1pdD09IkFkbWl0dGVkIiksIG9icz1hcy52ZWN0b3Iob2JzKSkNCmhlYWQocHJlZDIpDQoNCmdncGxvdChwcmVkMiwgYWVzKHg9RGVwdCwgeT1maXQsIGdyb3VwPUdlbmRlciwgY29sb3I9R2VuZGVyKSkgKw0KICBnZW9tX2xpbmUoc2l6ZT0xLjQpICsNCiAgZ2VvbV9wb2ludChhZXMoeT1vYnMpLCBzaXplPTMpICsNCiAgbGFicyh5PSJMb2cgb2RkcyAoQWRtaXR0ZWQiKSArDQogIHRoZW1lX2J3KGJhc2Vfc2l6ZSA9IDE2KSArDQogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9IGMoLjc1LCAuODApKQ0KDQoNCiMnICMjIEVmZmVjdCBwbG90cw0KYmVyay5lZmYyIDwtIGFsbEVmZmVjdHMoYmVyay5sb2dpdDIpDQoNCiMnIHBsb3QgbWFpbiAgZWZmZWN0cw0KcGxvdChiZXJrLmVmZjIpDQojIHBsb3QgJ2ludGVyYWN0aW9uJw0KcGxvdChlZmZlY3QoJ0RlcHQ6R2VuZGVyJywgYmVyay5sb2dpdDIpLCBtdWx0aWxpbmU9VFJVRSkNCg0KIycgIyMgaW5jbHVkZSBhIDEgZGYgdGVybSBmb3IgZGVwdCBBDQpiZXJrZWxleSA8LSB3aXRoaW4oYmVya2VsZXksIGRlcHQxQUcgPC0gKERlcHQ9PSdBJykqKEdlbmRlcj09J0ZlbWFsZScpKQ0KYmVyay5sb2dpdDMgPC0gZ2xtKEFkbWl0PT0iQWRtaXR0ZWQiIH4gRGVwdCArIEdlbmRlciArIGRlcHQxQUcsIA0KICAgICAgICAgICAgICAgICAgIGRhdGE9YmVya2VsZXksIHdlaWdodHM9RnJlcSwgZmFtaWx5PSJiaW5vbWlhbCIpDQpBbm92YShiZXJrLmxvZ2l0MywgdGVzdD0iV2FsZCIpDQpwbG90KGVmZmVjdCgnRGVwdDpHZW5kZXInLCBiZXJrLmxvZ2l0MyksIG11bHRpbGluZT1UUlVFKQ0KDQpvYnMgPC0gbG9nKFVDQkFkbWlzc2lvbnNbMSwsXSAvIFVDQkFkbWlzc2lvbnNbMiwsXSkNCnByZWQzIDwtIGNiaW5kKGJlcmtlbGV5WywxOjNdLCBmaXQ9cHJlZGljdChiZXJrLmxvZ2l0MykpDQpwcmVkMyA8LSBjYmluZChzdWJzZXQocHJlZDMsIEFkbWl0PT0iQWRtaXR0ZWQiKSwgb2JzPWFzLnZlY3RvcihvYnMpKQ0KaGVhZChwcmVkMykNCg0KZ2dwbG90KHByZWQzLCBhZXMoeD1EZXB0LCB5PWZpdCwgZ3JvdXA9R2VuZGVyLCBjb2xvcj1HZW5kZXIpKSArDQogIGdlb21fbGluZShzaXplPTEuNCkgKw0KICBnZW9tX3BvaW50KGFlcyh5PW9icyksIHNpemU9MykgKw0KICBsYWJzKHk9IkxvZyBvZGRzIChBZG1pdHRlZCIpICsNCiAgdGhlbWVfYncoYmFzZV9zaXplID0gMTYpICsNCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gYyguNzUsIC44MCkpDQoNCg0K