library( arm )
library( ggplot2 )
library( plyr )
# Install package from textbook to get the data by
# running this line once.
#install.packages( "faraway" )
40 Code for Faraway Example
This handout gives the code for the longitudinal data example from Faraway book chapter 9 (see iPac on Canvas). See that chapter to get explanations, etc., or just run code line by line to see what you get! Note: this code uses ggplot. Book uses another plotting package called lattice
; don’t bother with lattice
.
40.1 R Setup
40.2 First Example
# load the data
library(faraway)
data(psid)
head(psid)
age educ sex income year person
1 31 12 M 6000 68 1
2 31 12 M 5300 69 1
3 31 12 M 5200 70 1
4 31 12 M 6900 71 1
5 31 12 M 7500 72 1
6 31 12 M 8000 73 1
# Make log-transform of income
$log_income = with( psid, log( income + 100 ) )
psid
# Look at some plots
= subset( psid, person < 21 )
psid.sub ggplot( data=psid.sub, aes( x=year, y=income ) ) +
facet_wrap( ~ person ) +
geom_line()
ggplot( data=psid.sub, aes( x=year, y=log_income, group=person ) ) +
facet_wrap( ~ sex ) +
geom_line()
# Simple regression on a single person
<- lm( log_income ~ I(year-78), subset=(person==1), psid)
lmod coef(lmod)
(Intercept) I(year - 78)
9.40910950 0.08342068
# Now do linear regression on everyone
= ddply( psid, .(person), function( dat ) {
sum.stat <- lm(log(income) ~ I(year-78), data=dat )
lmod = coef(lmod)
cc names(cc) = c("intercept","slope")
c( cc, sex=dat$sex[[1]] )
} )head( sum.stat )
person intercept slope sex
1 1 9.399957 0.08426670 2
2 2 9.819091 0.08281031 2
3 3 7.893863 0.03131149 1
4 4 7.853027 0.07585135 1
5 5 8.033453 -0.04738677 1
6 6 9.673443 0.08953380 2
plot( slope ~ intercept, data=sum.stat, xlab="Intercept",ylab="Slope")
boxplot( slope ~ sex, data=sum.stat )
# Is rate of income growth different by sex?
t.test( slope ~ sex, data=sum.stat )
Welch Two Sample t-test
data: slope by sex
t = 2.3786, df = 56.736, p-value = 0.02077
alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
95 percent confidence interval:
0.00507729 0.05916871
sample estimates:
mean in group 1 mean in group 2
0.08903346 0.05691046
# Is initial income different by sex?
t.test( intercept ~ sex, data=sum.stat )
Welch Two Sample t-test
data: intercept by sex
t = -8.2199, df = 79.719, p-value = 3.065e-12
alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
95 percent confidence interval:
-1.4322218 -0.8738792
sample estimates:
mean in group 1 mean in group 2
8.229275 9.382325
40.3 Fitting the model
# Fitting our model
library(lme4)
$cyear <- psid$year-78
psid<- lmer(log(income) ~ cyear*sex + age + educ + (cyear|person), psid)
mmod display(mmod)
lmer(formula = log(income) ~ cyear * sex + age + educ + (cyear |
person), data = psid)
coef.est coef.se
(Intercept) 6.67 0.54
cyear 0.09 0.01
sexM 1.15 0.12
age 0.01 0.01
educ 0.10 0.02
cyear:sexM -0.03 0.01
Error terms:
Groups Name Std.Dev. Corr
person (Intercept) 0.53
cyear 0.05 0.19
Residual 0.68
---
number of obs: 1661, groups: person, 85
AIC = 3839.8, DIC = 3751.2
deviance = 3785.5
# refit with the lmerTest library to get p-values
library( lmerTest )
<- lmer(log(income) ~ cyear*sex + age + educ + (cyear|person), psid)
mmod summary(mmod)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: log(income) ~ cyear * sex + age + educ + (cyear | person)
Data: psid
REML criterion at convergence: 3819.8
Scaled residuals:
Min 1Q Median 3Q Max
-10.2310 -0.2134 0.0795 0.4147 2.8254
Random effects:
Groups Name Variance Std.Dev. Corr
person (Intercept) 0.2817 0.53071
cyear 0.0024 0.04899 0.19
Residual 0.4673 0.68357
Number of obs: 1661, groups: person, 85
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 6.674211 0.543323 81.176972 12.284 < 2e-16 ***
cyear 0.085312 0.008999 78.915123 9.480 1.14e-14 ***
sexM 1.150312 0.121292 81.772542 9.484 8.06e-15 ***
age 0.010932 0.013524 80.837434 0.808 0.4213
educ 0.104209 0.021437 80.722319 4.861 5.65e-06 ***
cyear:sexM -0.026306 0.012238 77.995359 -2.150 0.0347 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) cyear sexM age educ
cyear 0.020
sexM -0.104 -0.098
age -0.874 0.002 -0.026
educ -0.597 0.000 0.008 0.167
cyear:sexM -0.003 -0.735 0.156 -0.010 -0.011
40.4 Model Diagnostics
# First add our residuals and fitted values to our original data
# (We can do this since we have no missing data so the rows will line up
# correctly)
= transform( psid, resid=resid( mmod ),
psid fit = fitted( mmod ) )
head( psid )
age educ sex income year person log_income cyear resid fit
1 31 12 M 6000 68 1 8.716044 -10 0.06719915 8.632316
2 31 12 M 5300 69 1 8.594154 -9 -0.13201639 8.707478
3 31 12 M 5200 70 1 8.575462 -8 -0.22622748 8.782641
4 31 12 M 6900 71 1 8.853665 -7 -0.01852759 8.857804
5 31 12 M 7500 72 1 8.935904 -6 -0.01030887 8.932967
6 31 12 M 8000 73 1 8.999619 -5 -0.02093325 9.008130
# Here is a qqplot for each sex
ggplot( data=psid ) +
facet_wrap( ~ sex ) +
stat_qq( aes( sample=resid ) )
# If you want to add the lines, you have to do a little more work
= ddply( psid, .(sex), function( dat ) {
slopes <- quantile(dat$resid, c(0.25, 0.75))
y <- qnorm(c(0.25, 0.75))
x <- as.numeric( diff(y)/diff(x) )
slope <- y[[1]] - slope * x[[1]]
int c( slope=slope, int=int )
} ) slopes
sex slope int
1 F 0.4324568 0.10579138
2 M 0.2473357 0.03321435
ggplot( data=psid ) +
facet_wrap( ~ sex ) +
stat_qq( aes( sample=resid ) ) +
geom_abline( data=slopes, aes( slope=slope, intercept=int ) )
And a residual plot
$educ_levels = cut(psid$educ, c(0,8.5,12.5,20), labels=c( "Less than HS", "HS", "Beyond HS" ) )
psidggplot( data=psid, aes( x=fit, y=resid ) ) +
facet_wrap( ~ educ_levels ) +
geom_point()
40.4.1 Lattice code
For reference, we can also do this:
# This is doing it from the lattice package
library( lattice )
qqmath(~resid(mmod) | sex, psid)
# fancier with some lines. The points should lie on the line
# if we have normal residuals. (We don't.)
qqmath(~ resid(mmod) | sex, data = psid,
panel = function(x, ...) {
panel.qqmathline(x, ...)
panel.qqmath(x, ...)
})