# Polytomous Data: nested dichotomies library(car) # for data and Anova() data(Womenlf) some(Womenlf)
## partic hincome children region ## 1 not.work 15 present Ontario ## 7 not.work 15 present Ontario ## 14 not.work 9 present Prairie ## 15 not.work 45 present Atlantic ## 104 not.work 15 absent BC ## 136 fulltime 11 present Ontario ## 197 fulltime 5 present Prairie ## 204 not.work 28 present Quebec ## 229 parttime 23 present Quebec ## 239 not.work 13 present Quebec
# create dichotomies Womenlf <- within(Womenlf, { working <- recode(partic, " 'not.work' = 'no'; else = 'yes' ") fulltime <- recode(partic, " 'fulltime' = 'yes'; 'parttime' = 'no'; 'not.work' = NA") }) some(Womenlf)
## partic hincome children region fulltime working ## 2 not.work 13 present Ontariono ## 4 not.work 23 present Ontario no ## 10 not.work 23 present Ontario no ## 79 not.work 19 present Prairie no ## 116 not.work 13 present Prairie no ## 126 parttime 13 present Ontario no yes ## 137 parttime 14 present Ontario no yes ## 156 not.work 23 present BC no ## 159 fulltime 22 absent Ontario yes yes ## 184 fulltime 18 absent Ontario yes yes
# fit models for each dichotomy Womenlf <- within(Womenlf, contrasts(children) <- "contr.treatment") mod.working <- glm(working ~ hincome + children, family = binomial, data = Womenlf) summary(mod.working)
## ## Call: ## glm(formula = working ~ hincome + children, family = binomial, ## data = Womenlf) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -1.677 -0.865 -0.777 0.929 1.997 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 1.3358 0.3838 3.48 0.0005 *** ## hincome -0.0423 0.0198 -2.14 0.0324 * ## childrenpresent -1.5756 0.2923 -5.39 7e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 356.15 on 262 degrees of freedom ## Residual deviance: 319.73 on 260 degrees of freedom ## AIC: 325.7 ## ## Number of Fisher Scoring iterations: 4
mod.fulltime <- glm(fulltime ~ hincome + children, family = binomial, data = Womenlf) summary(mod.fulltime)
## ## Call: ## glm(formula = fulltime ~ hincome + children, family = binomial, ## data = Womenlf) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -2.405 -0.868 0.395 0.621 1.764 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 3.4778 0.7671 4.53 5.8e-06 *** ## hincome -0.1073 0.0392 -2.74 0.0061 ** ## childrenpresent -2.6515 0.5411 -4.90 9.6e-07 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 144.34 on 107 degrees of freedom ## Residual deviance: 104.49 on 105 degrees of freedom ## (155 observations deleted due to missingness) ## AIC: 110.5 ## ## Number of Fisher Scoring iterations: 5
Anova(mod.working)
## Analysis of Deviance Table (Type II tests) ## ## Response: working ## LR Chisq Df Pr(>Chisq) ## hincome 4.83 1 0.028 * ## children 31.32 1 2.2e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(mod.fulltime)
## Analysis of Deviance Table (Type II tests) ## ## Response: fulltime ## LR Chisq Df Pr(>Chisq) ## hincome 9.0 1 0.0027 ** ## children 32.1 1 1.4e-08 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
attach(Womenlf) # get fitted values for both sub-models predictors <- expand.grid(hincome = 1:45, children = c("absent", "present")) p.work <- predict(mod.working, predictors, type = "response") p.fulltime <- predict(mod.fulltime, predictors, type = "response") # calculate unconditional probs for the three response categories p.full <- p.work * p.fulltime p.part <- p.work * (1 - p.fulltime) p.not <- 1 - p.work ## NB: the plot looks best if the plot window is made wider than tall op <- par(mfrow = c(1, 2)) # children absent panel plot(c(1, 45), c(0, 1), type = "n", xlab = "Husband's Income", ylab = "Fitted Probability", main = "Children absent") lines(1:45, p.not[1:45], lty = 1, lwd = 3, col = "black") lines(1:45, p.part[1:45], lty = 2, lwd = 3, col = "blue") lines(1:45, p.full[1:45], lty = 3, lwd = 3, col = "red") legend(5, 0.97, lty = 1:3, lwd = 3, col = c("black", "blue", "red"), legend = c("not working", "part-time", "full-time")) # children present panel plot(c(1, 45), c(0, 1), type = "n", xlab = "Husband's Income", ylab = "Fitted Probability", main = "Children present") lines(1:45, p.not[46:90], lty = 1, lwd = 3, col = "black") lines(1:45, p.part[46:90], lty = 2, lwd = 3, col = "blue") lines(1:45, p.full[46:90], lty = 3, lwd = 3, col = "red")
par(op) # a more general way to make the plot op <- par(mfrow = c(1, 2)) plotdata <- data.frame(predictors, p.full, p.part, p.not) Hinc <- 1:max(hincome) for (kids in c("absent", "present")) { data <- subset(plotdata, children == kids) plot(range(hincome), c(0, 1), type = "n", xlab = "Husband's Income", ylab = "Fitted Probability", main = paste("Children", kids)) lines(Hinc, data$p.not, lwd = 3, col = "black", lty = 1) lines(Hinc, data$p.part, lwd = 3, col = "blue", lty = 2) lines(Hinc, data$p.full, lwd = 3, col = "red", lty = 3) if (kids == "absent") { legend(5, 0.97, lty = 1:3, lwd = 3, col = c("black", "blue", "red"), legend = c("not working", "part-time", "full-time")) } }
par(op) detach(Womenlf)
R version 2.15.1 (2012-06-22)
using the R package knitr
(version 0.8.4
) on
Wed Sep 26 09:01:53 2012
.