\documentclass[11pt]{article} \usepackage{times,sw,SweaveModified,amsmath} \usepackage[paperheight=20cm,paperwidth=25cm,lmargin=1cm,rmargin=1cm, tmargin=.5cm,bmargin=1.5cm]{geometry} \newcommand{\bx}{\mathbf{x}} \newcommand{\bbeta}{\boldsymbol{\beta}} <>= options(width=100,show.signif.stars=FALSE,digits=4,prompt=" ", continue=" ") @ \begin{document} \section*{Stat 8053, Fall 2013: Logistic Models (Faraway, Chap.~2)} The data for this example is in the package \texttt{alr4}. If you downloaded before August 30, you need to download it again to reproduce this handout. <>= install.packages("alr4") @ In July 1999 a very large storm devastated the tree population of the Boundary Waters Canoe Area Wilderness (BWCAW) in Northern Minnesota. This first example concern one species of trees called black spruce. <<>>= library(alr4) # also loads 'car' and 'effects' packages some(BlowBS) # from the 'car' package required by 'alr4' @ Here \texttt{d} is the diameter of the tree in cm to the nearest 0.5 cm, \texttt{died} is the response $y$, the number of trees that died, and \texttt{m} is the number of trees sampled. <>= plot( I(died/m) ~ d, BlowBS, xlab="Diameter, cm", ylab="Observed blow down frac.", cex=.4*sqrt(m), log="x", ylim=c(0,1)) g1 <- glm(cbind(died, m-died) ~ log(d), data=BlowBS, family=binomial) bs <- round(coef(g1), 4) dnew <- seq(3, 40, length=100) lines(dnew, predict(g1, newdata=data.frame(d=dnew), type="response"), lwd=1.5) grid(col="gray", lty="solid") @ \centerline{\includegraphics[width=3.3in]{logisticreg-fig1.pdf}} <<>>= summary(g1) Anova(g1) @ \subsubsection*{Coefficients} Let $\bx$ be the observed predictors and $\hat{\bbeta}$ the vector of estimates including the intercept, so the fitted log odds are $\bx'\hat{\bbeta}$. Suppose $\bx_1$ differs from $\bx$ by increasing $x_1$ to $x_1+\delta$. Then \begin{align*} \log\left( \frac{p(\bx_1)}{1-p(\bx_1)} \right) &= \bx_1'\hat{\bbeta} \\ \frac{p(\bx_1)}{1-p(\bx_1)} &= \exp(\bx_1'\hat{\bbeta}) \\ &= \exp(\hat{\beta}_0 + \hat{\beta}_1(x_1+\delta) + \cdots \hat{\beta}_px_p)\\ &= \exp(\hat{\beta_1}\delta) \exp(\hat{\beta}_0 + \hat{\beta}_1x_1 + \cdots \hat{\beta}_px_p)\\ &= \exp(\hat{\beta_1}\delta) \left(\frac{p(\bx)}{1-p(\bx)}\right) \end{align*} Example: If \texttt{d} is increased by 10\%, then $\log(1.1\texttt{d}) = \log(\texttt{d}) + \log(1.1) = \log(\texttt{d}) + \delta = \approx \log(\texttt{d}) + 0.095$, and so the odds of blowdown are multiplied by $\exp(\hat{\beta}_1 \times 0.095) = 1.36$. Since $\log(1.25) \approx \Sexpr{round(log(1.25), 3)}$, a 25\% increase in \texttt{d} corresponds to multiplying the odds of blowdown by $\exp(\log(1.25)\hat{\beta}_1 ) = 2.07$. To get a confidence interval, exponentiate the end-points of an interval for $\beta_1$: <<>>= exp(log(1.1) * confint(g1)[2, ]) @ \newpage \subsubsection*{Per Tree} <<>>= str(Blowdown) @ This data file uses each tree as a unit of analysis, rather than grouped by diameter: <<>>= summary(g2 <- glm(y ~ log(d), data=Blowdown, family=binomial, subset=spp=="black spruce")) compareCoefs(g1, g2) # from 'car' Anova(g2) # also from 'car' @ \subsubsection*{A bigger regression} <<>>= summary(g3 <- glm(y ~ log(d) + s + spp, data=Blowdown, family=binomial)) @ This model has an intercept for each of the 9 species, given in logit scale by <<>>= round(ints <- coef(g3)[1] + c(0, coef(g3)[4:11]), 3) @ but a common slope for \texttt{d} and for \texttt{s}. \subsubsection*{Effects} A \textbf{typical fitted value} for each species can be obtained by getting fitted values for each species with \texttt{s} and \texttt{d} fixed at a typical value, by default their means: <<>>= (new <- with(Blowdown, data.frame(spp=levels(spp), d=exp(mean(log(d))), s=mean(s)))) predict(g3, newdata=new) predict(g3, newdata=new, type="response") @ <>= or <- order(ints) Blowdown$spp <- with(Blowdown, factor(spp, levels=levels(spp)[or])) g3 <- update(g3) (e3 <- Effect("spp", g3)) # from the 'effects' package plot(e3, rotx=45, grid=TRUE) @ \centerline{\includegraphics[width=3.5in]{logisticreg-fig2.pdf}} \end{document}