library(foreign) #this lets us read in spss files
library(tidyverse) #this is a broad package that allows us to do lots of data management-y things (and ggplot!)
library(lme4) #this allows us to run MLM
library(arm) #this allows us to display MLM
library( lmerTest ) # this puts p-values on the summary() command for fixed effects
39 Code for HSB Example in Chapter 4 of R&B
This script builds everything from Chapter 4 of Raudenbush and Bryk in R. It is a very useful script for getting pretty much all the code you would need for a conventional multilevel analysis. The code is divided by each table or plot from the chapter.
39.1 R Setup
39.2 Load HS&B data
# Read student data
= read.spss( "data/hsb1.sav", to.data.frame=TRUE )
stud.dat
# Read in school data
= read.spss( "data/hsb2.sav", to.data.frame=TRUE )
sch.dat
# Make single data frame with all variables, keep all students even if they
# don't match to a school
= merge( stud.dat, sch.dat, by="id", all.x=TRUE ) dat
39.3 Table 4.1 Descriptive summaries
## Get mean and SD of the Level 1 variables, rounded to 2 decimal places
# math achievement
round(mean(dat$mathach),2)
[1] 12.75
round(sd(dat$mathach),2)
[1] 6.88
# ses
round(mean(dat$ses),2)
[1] 0
round(sd(dat$ses),2)
[1] 0.78
## Get mean and SD of Level 2 variables, round to 2 decimal places
# NOTE: we are getting these from the SCHOOL-LEVEL FILE
# sector
round(mean(sch.dat$sector),2) # this answers "what percent of schools are catholic?"
[1] 0.44
round(sd(sch.dat$sector),2)
[1] 0.5
# mean ses
round(mean(sch.dat$meanses),2) # this answers "what is the average of the school-average SES values?"
[1] 0
round(sd(sch.dat$meanses),2)
[1] 0.41
# NOTE: if we used the student-level or "dat" file, we would be answering the
# following questions:
# * what percent of students attend a catholic school?
# * what is the average student ses? <- this would match what we calculated
# ourselves if we had the entire school in our sample
39.4 Table 4.2: One-Way ANOVA (i.e uncontrolled random intercept)
## Fit the model described
.2 <- lmer(mathach ~ 1 + (1|id), data=dat)
mod4# Peek at the results
display(mod4.2)
lmer(formula = mathach ~ 1 + (1 | id), data = dat)
coef.est coef.se
12.64 0.24
Error terms:
Groups Name Std.Dev.
id (Intercept) 2.93
Residual 6.26
---
number of obs: 7185, groups: id, 160
AIC = 47122.8, DIC = 47114.8
deviance = 47115.8
## Extract the fixed effect coefficient (and it's standard error)
fixef(mod4.2) # extracts the fixed effect coefficient(s)
(Intercept)
12.63697
se.coef(mod4.2)$fixef #extracts the standard errors for the fixed effect(s)
[1] 0.2443936
## Extract the variance components
# Note: in the model display, we see the SDs, not the variance
VarCorr(mod4.2)
Groups Name Std.Dev.
id (Intercept) 2.9350
Residual 6.2569
# To get the variances, we extract each part and square it
# variance of random intercept
sigma.hat(mod4.2)$sigma$id)^2 (
(Intercept)
8.614025
# variance of level 1 residual (easier to extract)
sigma(mod4.2)^2
[1] 39.14832
# could also use the more complicated formula that we used with the intercept.
# If we do, we get the same thing
sigma.hat(mod4.2)$sigma$data^2
[1] 39.14832
# Inference on the need for a random intercept
# Thus uses the book's way of calculating a test statistic with a
# chi-squared distribution.
= dat %>% group_by( id ) %>%
schools summarise( nj = n(),
Y.bar.j = mean( mathach ) )
.00 = fixef( mod4.2 )[[1]]
gamma.2 = sigma(mod4.2)^2
sigma= sum( schools$nj * (schools$Y.bar.j - gamma.00)^2 / sigma.2 )
H H
[1] 1660.232
# our p-value
pchisq( H, df = nrow( schools ) - 1, lower.tail = FALSE )
[1] 4.770612e-248
# calculating the ICC
.00 = VarCorr(mod4.2)$id[1,1]
tau= tau.00 / (tau.00 + sigma.2 )
rho.hat rho.hat
[1] 0.1803518
# Calculating reliability for each school mean. (Here it is purely a function of
# students in the school. More students, more info, and thus more reliable.)
.2 = sigma(mod4.2)^2
sigma.00 = VarCorr(mod4.2)$id[1,1]
tau= tau.00 / ( tau.00 + sigma.2 / schools$nj )
lambda mean( lambda )
[1] 0.9013773
# A bonus graph of the reliabilities
qplot( lambda )
39.5 Table 4.3 Means as Outcomes Model
# (i.e. random intercept with Level 2 predictor)
## Fit the model described
.3 <- lmer(mathach ~ 1 + meanses + (1|id), data=dat)
mod4
# Peek at the results
display(mod4.3)
lmer(formula = mathach ~ 1 + meanses + (1 | id), data = dat)
coef.est coef.se
(Intercept) 12.65 0.15
meanses 5.86 0.36
Error terms:
Groups Name Std.Dev.
id (Intercept) 1.62
Residual 6.26
---
number of obs: 7185, groups: id, 160
AIC = 46969.3, DIC = 46956.9
deviance = 46959.1
## Extract the fixed effect coefficients (and standard errors/t-statistics)
fixef(mod4.3) # extracts the fixed effect coefficients
(Intercept) meanses
12.649435 5.863538
# NOTE: you can call them separately by "indexing" them
# just the intercept
fixef(mod4.3)[1]
(Intercept)
12.64944
# just coefficient on mean ses
fixef(mod4.3)[2]
meanses
5.863538
se.coef(mod4.3)$fixef #extracts the standard errors for the fixed effect(s)
[1] 0.1492801 0.3614580
## Calculate (or extract) the t-ratio (aka the t-statistic)
# NOTE: the author's don't present this for the intercept, because we often
# don't care. But it is presented here for completeness
# tstats for intercept
fixef(mod4.3)[1]/se.coef(mod4.3)$fixef[1]
(Intercept)
84.73622
# tstat mean ses
fixef(mod4.3)[2]/se.coef(mod4.3)$fixef[2]
meanses
16.22191
# tstat extracted - this does both variables at once!
coef(summary(mod4.3))[,"t value"]
(Intercept) meanses
84.73622 16.22191
# NOTE: Let's look at what is happening here:
coef(summary(mod4.3)) # gives us all the fixed effect statistics we could want
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 12.649435 0.1492801 153.7425 84.73622 6.032590e-131
meanses 5.863538 0.3614580 153.4067 16.22191 4.267894e-35
# the [ ] is called "indexing" - it's a way of subsetting data by telling R
# which [rows,columns] you want to see we are telling R that we want ALL rows "[
# ," but only the column labeled "t value"
## Extract the variance components
# Note: in the model display, we see the SDs, not the variance
VarCorr(mod4.3)
Groups Name Std.Dev.
id (Intercept) 1.6244
Residual 6.2576
# To get the variances, we extract each part and square it
# variance of random intercept
sigma.hat(mod4.3)$sigma$id)^2 (
(Intercept)
2.638708
# variance of level 1 residual
sigma(mod4.3)^2
[1] 39.15708
# Range of plausible values for school means for schools with mean SES of 0:
# See page 73-74)
fixef( mod4.3 )[[1]] + c(-1.96, 1.96) * (sigma.hat(mod4.3)$sigma$id)
[1] 9.465592 15.833279
# Compare to our model without mean ses
fixef( mod4.2 )[[1]] + c(-1.96, 1.96) * (sigma.hat(mod4.2)$sigma$id)
[1] 6.884441 18.389507
# Proportion reduction in variance or "variance explained" at level 2
00.anova = (sigma.hat(mod4.2)$sigma$id)^2
tau.00.meanses = (sigma.hat(mod4.3)$sigma$id)^2
tau.00.anova-tau.00.meanses) / tau.00.anova (tau.
(Intercept)
0.693673
## Inference on the random effects
= merge( schools, sch.dat, by="id" )
schools .00 = fixef( mod4.3 )[[1]]
gamma.01 = fixef( mod4.3 )[[2]]
gamma= mutate( schools, resid = Y.bar.j - gamma.00 - gamma.01*meanses )
schools = sum( schools$nj * schools$resid^2 ) / sigma(mod4.3)^2
H H
[1] 633.5175
pchisq( H, nrow( schools ) - 2, lower.tail = FALSE )
[1] 3.617696e-58
## Reliability revisited (from pg 75)
.3 mod4
Linear mixed model fit by REML ['lmerModLmerTest']
Formula: mathach ~ 1 + meanses + (1 | id)
Data: dat
REML criterion at convergence: 46961.28
Random effects:
Groups Name Std.Dev.
id (Intercept) 1.624
Residual 6.258
Number of obs: 7185, groups: id, 160
Fixed Effects:
(Intercept) meanses
12.649 5.864
= coef( mod4.3 )$id
u.hat head( u.hat )
(Intercept) meanses
1224 12.32688 5.863538
1288 12.71898 5.863538
1296 10.70101 5.863538
1308 12.92208 5.863538
1317 11.48086 5.863538
1358 11.73878 5.863538
.2 = sigma(mod4.3)^2
sigma.00 = VarCorr(mod4.3)$id[1,1]
tau.2 sigma
[1] 39.15708
.00 tau
[1] 2.638708
# These are the individual reliabilities---how well we can separate schools with the same Mean SES
# (So it is _conditional_ on the mean SES of the schools.)
= tau.00 / (tau.00 + (sigma.2 / schools$nj))
lambda.j mean( lambda.j )
[1] 0.7400747
39.6 Table 4.4 Random coefficient model (i.e. random slope)
# group-mean center ses
<- dat %>% group_by( id ) %>%
dat mutate( ses_grpcenter = ses - mean(ses) )
## Fit the model described
.4 <- lmer(mathach ~ 1 + ses_grpcenter + ( 1 + ses_grpcenter | id ), data=dat)
mod4# Peek at the results
display(mod4.4)
lmer(formula = mathach ~ 1 + ses_grpcenter + (1 + ses_grpcenter |
id), data = dat)
coef.est coef.se
(Intercept) 12.64 0.24
ses_grpcenter 2.19 0.13
Error terms:
Groups Name Std.Dev. Corr
id (Intercept) 2.95
ses_grpcenter 0.83 0.02
Residual 6.06
---
number of obs: 7185, groups: id, 160
AIC = 46726.2, DIC = 46707.7
deviance = 46711.0
## Extract the fixed effect coefficients (and standard errors/t-statistics)
coef(summary(mod4.4)) #this reproduces the whole first panel, though methods used above also work
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 12.636193 0.2445047 156.7512 51.68077 2.286893e-100
ses_grpcenter 2.193196 0.1282589 155.2166 17.09976 1.582355e-37
## Extract the variance components
# Note: in the model display, we see the SDs, not the variance
VarCorr(mod4.4)
Groups Name Std.Dev. Corr
id (Intercept) 2.94636
ses_grpcenter 0.83307 0.019
Residual 6.05807
# variance of random effects
sigma.hat(mod4.4)$sigma$id)^2 (
(Intercept) ses_grpcenter
8.6810437 0.6939974
# NOTE: to extract one or the other, you can use indexing
sigma.hat(mod4.4)$sigma$id[1])^2 #this is just the intercept random effect (
(Intercept)
8.681044
# variance of level 1 residual
sigma(mod4.4)^2
[1] 36.70019
39.7 Table 4.5 Intercepts and Slopes as Outcomes Model
## Fit the model described
.5 <- lmer(mathach ~ 1 + meanses + sector + ses_grpcenter*(meanses + sector) + ( 1 + ses_grpcenter | id ), data=dat)
mod4
# NOTE: The code above allows the coefficients to appear in the same order as in Table 4.5
# R automatically includes the main effects, so this model can be written more
# concisely as shown below:
#
# lmer(mathach ~ 1 + ses_grpcenter*(meanses + sector) + ( 1 + ses_grpcenter | id ), data=dat)
# Peek at the results
display(mod4.5)
lmer(formula = mathach ~ 1 + meanses + sector + ses_grpcenter *
(meanses + sector) + (1 + ses_grpcenter | id), data = dat)
coef.est coef.se
(Intercept) 12.10 0.20
meanses 5.33 0.37
sector 1.23 0.31
ses_grpcenter 2.94 0.16
meanses:ses_grpcenter 1.04 0.30
sector:ses_grpcenter -1.64 0.24
Error terms:
Groups Name Std.Dev. Corr
id (Intercept) 1.54
ses_grpcenter 0.32 0.39
Residual 6.06
---
number of obs: 7185, groups: id, 160
AIC = 46523.7, DIC = 46489.2
deviance = 46496.4
## Extract the fixed effect coefficients (and standard errors/t-statistics)
#this reproduces the whole first panel, though methods used above also work
coef(summary(mod4.5))
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 12.095997 0.1987329 159.9143 60.865590 1.625101e-112
meanses 5.332898 0.3691567 150.9836 14.446161 2.944282e-30
sector 1.226453 0.3062674 149.6139 4.004518 9.756638e-05
ses_grpcenter 2.938785 0.1550889 139.2934 18.949039 2.197507e-40
meanses:ses_grpcenter 1.038918 0.2988941 160.5428 3.475873 6.550388e-04
sector:ses_grpcenter -1.642619 0.2397854 143.3351 -6.850371 2.009493e-10
# NOTE: there is a slight descrepancy in the estimate for meanses:ses_grpcenter and
# the t-statistics for meanses:ses_grpcenter and sector:ses_grpcenter; nothing that
# changes the interpretations, however.
# Testing the need for sector (see page 82)
# (We use a likelihood ratio test with the anova() function)
5.null <- lmer(mathach ~ 1 + meanses + ses_grpcenter*(meanses) + ( 1 + ses_grpcenter | id ), data=dat)
mod4.anova( mod4.5, mod4.5.null )
Data: dat
Models:
mod4.5.null: mathach ~ 1 + meanses + ses_grpcenter * (meanses) + (1 + ses_grpcenter | id)
mod4.5: mathach ~ 1 + meanses + sector + ses_grpcenter * (meanses + sector) + (1 + ses_grpcenter | id)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
mod4.5.null 8 46568 46623 -23276 46552
mod4.5 10 46516 46585 -23248 46496 55.941 2 7.122e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Testing the need for random slope (see page 84)
# (We use a likelihood ratio test with the anova() function)
5.null.slope <- lmer(mathach ~ 1 + meanses + sector + ses_grpcenter*(meanses + sector) + ( 1 | id ), data=dat)
mod4.anova( mod4.5, mod4.5.null.slope )
Data: dat
Models:
mod4.5.null.slope: mathach ~ 1 + meanses + sector + ses_grpcenter * (meanses + sector) + (1 | id)
mod4.5: mathach ~ 1 + meanses + sector + ses_grpcenter * (meanses + sector) + (1 + ses_grpcenter | id)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
mod4.5.null.slope 8 46513 46568 -23249 46497
mod4.5 10 46516 46585 -23248 46496 1.0039 2 0.6054
39.8 Figure 4.1
NOTE: Figure 4.1 is a graphical display using the results from Model/Table 4.5
The solid line represents the slope of the gamma-01 coefficient; this is the same in public and catholic schools. The dotted lines represent the the slope for individual schools with “prototypical” values of meanses (-1,0,1 standard deviations from mean)
# to calculate this, we should note a few values:
<- mean(dat$meanses) #average of mean ses var
avg_meanses <- mean(dat$meanses) + sd(dat$meanses) # 1 sd above avg meanses
high_meanses <- mean(dat$meanses) - sd(dat$meanses) # 1 sd below avg meanses
low_meanses
= expand.grid( id = -1,
fake.students meanses = c( low_meanses, avg_meanses, high_meanses ),
sector = c( 0, 1 ),
ses_grpcenter = c( -1, 0, 1 ) )
= mutate( fake.students, ses = meanses + ses_grpcenter )
fake.students $mathach = predict( mod4.5, newdata=fake.students, allow.new.levels = TRUE )
fake.students= filter( fake.students, ses_grpcenter == 0 )
fake.schools
ggplot( fake.students, aes( ses, mathach ) ) +
facet_wrap( ~ sector ) +
geom_line( aes( group=meanses ), lty = 2 ) +
geom_line( data=fake.schools, aes( x = ses, y = mathach ) ) +
geom_point( data=fake.schools, aes( x = ses, y = mathach ) )
39.9 Set-up for remaining tables/figures of chapter
In order to create table 4.6 and the following 2 graphs, we will need to prepare a new dataset. These next lines of code do that.
## Start with school level data frame and keep variables interesting to our model comparison
<- dplyr::select( sch.dat, id, meanses, sector )
mod.comp
## Add in number of observations per school
<- dat %>% group_by( id ) %>%
n_j ::summarise(n_j = n())
dplyr
<- merge(mod.comp, n_j, by="id")
mod.comp head( mod.comp )
id meanses sector n_j
1 1224 -0.428 0 47
2 1288 0.128 0 25
3 1296 -0.420 0 48
4 1308 0.534 1 20
5 1317 0.351 1 48
6 1358 -0.014 0 30
## Run site-specific OLS for each school and save estimates
# Calculate global (not group) centered ses
$ses_centered <- dat$ses - mean(dat$ses)
dat
# This is the "for loop" method of generating an estimate for each of many small
# worlds (schools). See lecture 2.3 code for the "tidyverse" way.
<- matrix(nrow=160,ncol=2) #create a matrix to store estimates
est.ols <- matrix(nrow=160,ncol=2) #create matrix to store standard errors
se.ols
for (i in 1:length(unique(dat$id))){ #looping across the 160 different values of id
<- unique(dat$id)[i] #pick the value of id we want
id <- lm(mathach ~ 1 + ses_grpcenter, data=dat[dat$id==id,]) #run the model on students in that 1 school
mod <- coef( mod ) #save the setimates in the matrix we created
est.ols[i,] <- se.coef( mod ) # and the SEs
se.ols[i,]
}
#convert the matrix to a dataframe and attach the schoolid info
<- as.data.frame(est.ols)
est.ols $id <- sch.dat$id
est.olsnames(est.ols) <- c( 'b0_ols', 'b1_ols', 'id' )
#store standard errors for later
<- as.data.frame(se.ols)
se.ols $id <- sch.dat$id
se.olsnames(se.ols) <- c( 'se_b0_ols', 'se_b1_ols', 'id' )
<- merge(mod.comp, est.ols, by='id')
mod.comp <- merge(mod.comp, se.ols, by='id' )
mod.comp head( mod.comp )
id meanses sector n_j b0_ols b1_ols se_b0_ols se_b1_ols
1 1224 -0.428 0 47 9.715447 2.5085817 1.0954478 1.765216
2 1288 0.128 0 25 13.510800 3.2554487 1.3637656 2.079675
3 1296 -0.420 0 48 7.635958 1.0759591 0.7740752 1.209016
4 1308 0.534 1 20 16.255500 0.1260242 1.4045813 3.003437
5 1317 0.351 1 48 13.177688 1.2739128 0.7902486 1.435942
6 1358 -0.014 0 30 11.206233 5.0680087 0.8994345 1.391550
# We are done running OLS on each of our schools and storing the results.
## Extract site-specific coefficients from "unconditional model" (model 4.4)
.4 <- coef(mod4.4)$id
est4names(est4.4) <- c('b0_uncond', 'b1_uncond') #rename
.4$id = rownames( est4.4 )
est4
## Extract site-specific coefficients from the "conditional model" (model 4.5)
.5 <- coef(mod4.5)$id
est4head( est4.5 )
(Intercept) meanses sector ses_grpcenter meanses:ses_grpcenter
1224 12.02263 5.332898 1.226453 2.933689 1.038918
1288 12.55180 5.332898 1.226453 2.979174 1.038918
1296 10.38509 5.332898 1.226453 2.744066 1.038918
1308 12.12710 5.332898 1.226453 2.923822 1.038918
1317 10.56530 5.332898 1.226453 2.806582 1.038918
1358 11.60500 5.332898 1.226453 2.961265 1.038918
sector:ses_grpcenter
1224 -1.642619
1288 -1.642619
1296 -1.642619
1308 -1.642619
1317 -1.642619
1358 -1.642619
.5$id = rownames( est4.5 )
est4
# Now we need to calculate the point estimates using our individual regression equations
# including our level-2 values for each school
# (This is a bit of a pain.)
.5 = merge( est4.5, mod.comp, by="id", suffixes = c( "", ".v" ) )
est4head( est4.5 )
id (Intercept) meanses sector ses_grpcenter meanses:ses_grpcenter
1 1224 12.02263 5.332898 1.226453 2.933689 1.038918
2 1288 12.55180 5.332898 1.226453 2.979174 1.038918
3 1296 10.38509 5.332898 1.226453 2.744066 1.038918
4 1308 12.12710 5.332898 1.226453 2.923822 1.038918
5 1317 10.56530 5.332898 1.226453 2.806582 1.038918
6 1358 11.60500 5.332898 1.226453 2.961265 1.038918
sector:ses_grpcenter meanses.v sector.v n_j b0_ols b1_ols se_b0_ols
1 -1.642619 -0.428 0 47 9.715447 2.5085817 1.0954478
2 -1.642619 0.128 0 25 13.510800 3.2554487 1.3637656
3 -1.642619 -0.420 0 48 7.635958 1.0759591 0.7740752
4 -1.642619 0.534 1 20 16.255500 0.1260242 1.4045813
5 -1.642619 0.351 1 48 13.177688 1.2739128 0.7902486
6 -1.642619 -0.014 0 30 11.206233 5.0680087 0.8994345
se_b1_ols
1 1.765216
2 2.079675
3 1.209016
4 3.003437
5 1.435942
6 1.391550
.5 = mutate( est4.5,
est4b0_cond = `(Intercept)` + sector * sector.v + meanses * meanses.v,
b1_cond = ses_grpcenter + `sector:ses_grpcenter` * sector.v + `meanses:ses_grpcenter` * meanses.v )
.5 = dplyr::select( est4.5, id, b0_cond, b1_cond )
est4
## Combine the MLM estimates into 1 dataset with ids
<- merge( est4.4, est4.5, by="id" )
est.mlm
# Merge all the estimates together by school id
<- merge(mod.comp,est.mlm,by = 'id',all=TRUE)
mod.comp
head( mod.comp )
id meanses sector n_j b0_ols b1_ols se_b0_ols se_b1_ols b0_uncond
1 1224 -0.428 0 47 9.715447 2.5085817 1.0954478 1.765216 9.956953
2 1288 0.128 0 25 13.510800 3.2554487 1.3637656 2.079675 13.386036
3 1296 -0.420 0 48 7.635958 1.0759591 0.7740752 1.209016 8.039091
4 1308 0.534 1 20 16.255500 0.1260242 1.4045813 3.003437 15.622073
5 1317 0.351 1 48 13.177688 1.2739128 0.7902486 1.435942 13.132771
6 1358 -0.014 0 30 11.206233 5.0680087 0.8994345 1.391550 11.387452
b1_uncond b0_cond b1_cond
1 2.262837 9.740146 2.489033
2 2.375964 13.234409 3.112156
3 1.872247 8.145275 2.307720
4 2.050193 16.201317 1.835985
5 1.997129 13.663596 1.528623
6 2.738390 11.530341 2.946721
39.10 Table 4.6 Comparing site-specific estimates from different models
## Create the list of rows that B&R include in the table p. 87
<- c(4, 15, 17, 22, 27, 53, 69, 75, 81, 90, 135, 153)
keeprows
## Limit data to the rows of interest, and print the columns in Table 4.6 in the correct order
.6 <- mod.comp[keeprows, c('b0_ols','b1_ols','b0_uncond','b1_uncond','b0_cond','b1_cond','n_j','meanses','sector') ]
tab4
## Print Table 4.6 -- the Empirical Bayes from conditional model (b0_cond, b1_cond) are waaaaaay off
round(tab4.6,2)
b0_ols b1_ols b0_uncond b1_uncond b0_cond b1_cond n_j meanses sector
4 16.26 0.13 15.62 2.05 16.20 1.84 20 0.53 1
15 15.98 2.15 15.74 2.19 16.01 1.84 53 0.52 1
17 18.11 0.09 17.41 1.95 17.25 3.71 29 0.69 0
22 11.14 -0.78 11.22 1.15 10.89 0.63 67 -0.62 1
27 13.40 4.10 13.32 2.54 12.95 3.00 38 -0.06 0
53 9.52 3.74 9.76 2.75 9.37 2.42 51 -0.64 0
69 11.47 6.18 11.64 2.72 11.92 3.03 25 0.08 0
75 9.06 1.65 9.28 2.01 9.30 0.67 63 -0.59 1
81 15.42 5.26 15.25 3.14 15.53 1.91 66 0.43 1
90 12.14 1.97 12.18 2.14 12.34 3.03 50 0.19 0
135 4.55 0.25 6.42 1.92 8.55 2.63 14 0.03 0
153 10.28 0.76 10.71 2.06 9.67 2.37 19 -0.59 0
39.11 Figure 4.2 : Scatter plots of the estimates from 2 unconstrained models
## Panel (a) and Panel (b) are plotted on the same graph
ggplot(data=mod.comp,aes()) +
geom_point(aes(x=b1_ols,y=b0_ols),color='black',alpha=0.7) +
geom_point(aes(x=b1_uncond,y=b0_uncond),color='blue',alpha=0.7) +
labs(title="Black=OLS; Blue=Unconditional EB") +
xlim(-5,8) + ylim(2,20)
39.12 Figure 4.3 : Scatter plots of residuals from the OLS & Constrained MLM model
## Luke: Equation 4.271 and 4.27b (p. 92) are allegedly how we calculate the intercept and slope residuals
## But I'm not sure where the estimates for the gamma-hat terms come from; the OLS model only includes
## individual-level ses
# trying it here with the predictions from conditional EB
= fixef( mod4.5 )
fes fes
(Intercept) meanses sector
12.095997 5.332898 1.226453
ses_grpcenter meanses:ses_grpcenter sector:ses_grpcenter
2.938785 1.038918 -1.642619
= mutate( mod.comp,
mod.comp u0_ols = b0_ols - (fes[1] + fes[2]*meanses + fes[3]*sector),
u1_ols = b1_ols - (fes[4] + fes[5]*meanses + fes[6]*sector) )
## Panel (a) and (b) plotted on same graph
= mutate( mod.comp,
mod.comp u0_cond = b0_cond - (fes[1] + fes[2]*meanses + fes[3]*sector),
u1_cond = b1_cond - (fes[4] + fes[5]*meanses + fes[6]*sector) )
head( mod.comp )
id meanses sector n_j b0_ols b1_ols se_b0_ols se_b1_ols b0_uncond
1 1224 -0.428 0 47 9.715447 2.5085817 1.0954478 1.765216 9.956953
2 1288 0.128 0 25 13.510800 3.2554487 1.3637656 2.079675 13.386036
3 1296 -0.420 0 48 7.635958 1.0759591 0.7740752 1.209016 8.039091
4 1308 0.534 1 20 16.255500 0.1260242 1.4045813 3.003437 15.622073
5 1317 0.351 1 48 13.177688 1.2739128 0.7902486 1.435942 13.132771
6 1358 -0.014 0 30 11.206233 5.0680087 0.8994345 1.391550 11.387452
b1_uncond b0_cond b1_cond u0_ols u1_ols u0_cond u1_cond
1 2.262837 9.740146 2.489033 -0.09807014 0.01445354 -0.07337107 -0.005095579
2 2.375964 13.234409 3.112156 0.73219201 0.18368221 0.45580075 0.040389349
3 1.872247 8.145275 2.307720 -2.22022179 -1.42648036 -1.71090544 -0.194719195
4 2.050193 16.201317 1.835985 0.08528223 -1.72492410 0.03109939 -0.014963432
5 1.997129 13.663596 1.528623 -2.01661001 -0.38691354 -1.53070178 -0.132203129
6 2.738390 11.530341 2.946721 -0.81510320 2.14376861 -0.49099553 0.022480378
nrow( mod.comp )
[1] 160
ggplot(data=mod.comp, aes( pch=as.factor(sector)) ) +
geom_point(aes(x=u1_ols, y=u0_ols),color='black', alpha=0.7) +
geom_point(aes(x=u1_cond, y=u0_cond),color='blue', alpha=0.7) +
labs(title = "Black: OLS, Blue: Conditional EB") +
xlim(-6,6) + ylim(-8,8)
# To get in two-panel format we need to get our data to long format
= data.frame( sector = mod.comp$sector,
mod.comp.ols u0 = mod.comp$u0_ols,
u1 = mod.comp$u1_ols )
= data.frame( sector = mod.comp$sector,
mod.comp.EB u0 = mod.comp$u0_cond,
u1 = mod.comp$u1_cond )
= bind_rows( ols=mod.comp.ols, cond = mod.comp.EB, .id = "method" )
mod.comp.l
ggplot(data=mod.comp.l, aes( u1, u0, pch=as.factor(sector)) ) +
facet_wrap( ~ method ) +
geom_point()
39.13 Table 4.7 : pg 94
# This section is not very good--I would skip.
# Generating confidence intervals for individual random intercepts and slopes is a weird business.
# OLS First:
# Doing it by fitting OLS on our subset
.2305 = filter( dat, id == 2305 )
schhead( sch.2305 )
# A tibble: 6 × 13
# Groups: id [1]
id minority female ses mathach size sector pracad disclim himinty
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2305 1 1 -0.738 16.4 485 1 0.69 -1.38 1
2 2305 1 1 -1.18 12.8 485 1 0.69 -1.38 1
3 2305 1 1 -0.308 15.3 485 1 0.69 -1.38 1
4 2305 1 1 -0.358 12.7 485 1 0.69 -1.38 1
5 2305 1 1 -1.52 10.2 485 1 0.69 -1.38 1
6 2305 1 1 -0.518 8.94 485 1 0.69 -1.38 1
# ℹ 3 more variables: meanses <dbl>, ses_grpcenter <dbl>, ses_centered <dbl>
.2305 = lm( mathach ~ ses_grpcenter, data=sch.2305 )
M.2305 M
Call:
lm(formula = mathach ~ ses_grpcenter, data = sch.2305)
Coefficients:
(Intercept) ses_grpcenter
11.1378 -0.7821
confint( M.2305 )
2.5 % 97.5 %
(Intercept) 9.911824 12.363698
ses_grpcenter -2.665989 1.101767
.8367 = filter( dat, id == 8367 )
schhead( sch.8367 )
# A tibble: 6 × 13
# Groups: id [1]
id minority female ses mathach size sector pracad disclim himinty
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 8367 0 0 -0.228 15.9 153 0 0 1.79 0
2 8367 0 1 -0.208 1.86 153 0 0 1.79 0
3 8367 1 0 0.532 -2.83 153 0 0 1.79 0
4 8367 0 1 0.662 2.12 153 0 0 1.79 0
5 8367 0 0 -0.228 6.76 153 0 0 1.79 0
6 8367 0 0 -1.08 0.725 153 0 0 1.79 0
# ℹ 3 more variables: meanses <dbl>, ses_grpcenter <dbl>, ses_centered <dbl>
.8367 = lm( mathach ~ ses_grpcenter, data=sch.8367 )
M.8367 M
Call:
lm(formula = mathach ~ ses_grpcenter, data = sch.8367)
Coefficients:
(Intercept) ses_grpcenter
4.5528 0.2504
confint( M.8367 )
2.5 % 97.5 %
(Intercept) 1.872974 7.232598
ses_grpcenter -3.431096 3.931845
# Use SE from earlier to get confint
.7 <- mod.comp[c(22,135),]
est4.7 est4
id meanses sector n_j b0_ols b1_ols se_b0_ols se_b1_ols b0_uncond
22 2305 -0.622 1 67 11.137761 -0.7821112 0.6138468 0.943289 11.222551
135 8367 0.032 0 14 4.552786 0.2503748 1.2299413 1.689668 6.423938
b1_uncond b0_cond b1_cond u0_ols u1_ols u0_cond u1_cond
22 1.149555 10.886600 0.6276417 1.132373 -1.432070 0.8812116 -0.02231763
135 1.924903 8.549007 2.6307047 -7.713864 -2.721656 -3.7176433 -0.34132569
# CI for intercept and slope using our normal and stored SEs.
# (Not taking t distribution into account changes things, as does not
# taking the uncertainty in the fixed effects for the EB CIs. So this is
# very approximate.)
= as.data.frame( se.coef(mod4.4)$id )
se_uncond head( se_uncond )
(Intercept) ses_grpcenter
1224 0.8464092 0.7189592
1288 1.1205609 0.7593421
1296 0.8382669 0.7111100
1308 1.2307722 0.8005012
1317 0.8382675 0.7377054
1358 1.0354852 0.7489241
names( se_uncond ) = c("se_b0_uncond","se_b1_uncond" )
= as.data.frame( se.coef( mod4.5 )$id )
se_cond names( se_cond ) = c("se_b0_cond","se_b1_cond" )
head( se_cond )
se_b0_cond se_b1_cond
1224 0.7662313 0.2929654
1288 0.9521965 0.2988437
1296 0.7601221 0.2923332
1308 1.0176181 0.3025414
1317 0.7603073 0.2940970
1358 0.8982481 0.2971694
$id = rownames( se_uncond )
se_uncond$id = rownames( se_cond )
se_cond.7 = merge( est4.7, se_uncond, by="id" )
est4.7 = merge( est4.7, se_cond, by="id" )
est4
7.int = mutate( est4.7,
est4.CI.low.ols = b0_ols + - 1.96 * se_b0_ols,
CI.high.ols = b0_ols + 1.96 * se_b0_ols,
CI.low.uncond = b0_uncond + - 1.96 * se_b0_uncond,
CI.high.uncond = b0_uncond + 1.96 * se_b0_uncond,
CI.low.cond = b0_cond + - 1.96 * se_b0_cond,
CI.high.cond = b0_cond + 1.96 * se_b0_cond )
::select( est4.7.int, starts_with("CI" ) ) dplyr
CI.low.ols CI.high.ols CI.low.uncond CI.high.uncond CI.low.cond CI.high.cond
1 9.934621 12.340901 9.815648 12.629455 9.579800 12.19340
2 2.142101 6.963471 3.642797 9.205078 6.361492 10.73652
7.slope = mutate( est4.7,
est4.CI.low.ols = b1_ols + - 1.96 * se_b1_ols,
CI.high.ols = b1_ols + 1.96 * se_b1_ols,
CI.low.uncond = b1_uncond + - 1.96 * se_b1_uncond,
CI.high.uncond = b1_uncond + 1.96 * se_b1_uncond,
CI.low.cond = b1_cond + - 1.96 * se_b1_cond,
CI.high.cond = b1_cond + 1.96 * se_b1_cond )
::select( est4.7.slope, starts_with("CI" ) ) dplyr
CI.low.ols CI.high.ols CI.low.uncond CI.high.uncond CI.low.cond CI.high.cond
1 -2.630958 1.066735 -0.1675367 2.466647 0.0629627 1.192321
2 -3.061375 3.562124 0.3960110 3.453795 2.0356645 3.225745