#
#
### Brief Introduction to LINEAR MIXED MODELS in R ###
#
# Also known as: Fixed and Random Effects Models, Nested effects Models, Mixed Effect Models,
# and Hierarchical Linear Modeling.
#
# Read in / import the example data, naming it 'lmm.data'.
lmm.data <- read.table("http://www.unt.edu/rss/class/Jon/R_SC/Module9/lmm.data.txt",
header=TRUE, sep=",", na.strings="NA", dec=".", strip.white=TRUE)
summary(lmm.data)
head(lmm.data)
nrow(lmm.data)
##### function lm #####
# Traditional OLS Linear Regression (Linear Models [lm]) can be used to evaluate multiple
# simple models (each with multiple predictors/covariates), often called sequential or hierarchical
# regression; using the 'anova' function to test one model against another (evaluating whether
# R-squared change or difference is significant).
lm.1 <- lm(extro ~ open + social, data = lmm.data)
summary(lm.1)
lm.2 <- lm(extro ~ open + agree + social, data = lmm.data)
summary(lm.2)
anova(lm.1, lm.2)
# However, tradiational OLS Linear Regression can not handle nested (random) effects; such as
# the model(s) above when scores are located at level 1 (classrooms) nested within level 2 (schools).
# The next two Linear Models (lm) demonstrate this. Notice in the output, the (categorical) factors
# are automatically dummy coded as would be necessary for them to be considered appropriate predictors
# in OLS linear regression (but, their nested nature is not specified).
lm.3 <- lm(extro ~ open + social + class + school, data = lmm.data)
summary(lm.3)
lm.4 <- lm(extro ~ open + agree + social + class + school, data = lmm.data)
summary(lm.4)
anova(lm.3, lm.4)
# In the next two models, the 'class:school' type of specification is used to specify interaction
# terms (e.g., the interaction of class and school), which is quite a different situation than one
# with random effects (e.g., nested variables). The output of both models show 'NA' where an interaction
# term is redundant with one listed somewhere above it (there are 4 classes and 6 schools).
lm.5 <- lm(extro ~ open + social + class:school, data = lmm.data)
summary(lm.5)
lm.6 <- lm(extro ~ open + agree + social + class:school, data = lmm.data)
summary(lm.6)
anova(lm.5, lm.6)
##### Function glm #####
# Even the more flexible Generalized Linear Model (glm) function can not handle nested effects, although it can
# handle some types of random effects (e.g., repeated measures designs/data which is not covered here). The primary
# benefit of the 'glm' function is the ability to specify non-Gaussian distributions (a Gaussian distribution is
# a normal distribution). Also, the output from the 'glm' function offers the Akaike Information Criterion (AIC)
# which can be used to compare models and is much preferred over something like R-square or even adjusted R-square
# (lower AIC indicates a better fitting model; an AIC of -22.45 indicates a better fitting model than one with
# an AIC of 14.25).
glm.1 <- glm(extro ~ open + social + class + school, data = lmm.data)
summary(glm.1)
# The following models are specifying an interaction between 'class' and 'school', which does not specify nesting.
# Of course, the two models below mirror the regression results from models 'lm.5' and 'lm.6' above.
glm.2 <- glm(extro ~ open + social + class:school, data = lmm.data)
summary(glm.2)
glm.3 <- glm(extro ~ open + agree + social + class:school, data = lmm.data)
summary(glm.3)
##### Package 'lme4' function 'lmer' #####
# In order to adequately test these nested (random) effects, we must turn to another type of modeling
# function/package.
# There are several ways to go about this, only one of which is mentioned here (lmer).
# Load the necessary library for model fitting.
library(lme4)
# The Linear Mixed Effects (lme4) package [which offers the 'lmer' function, below] is designed
# to fit a linear mixed model or a generalized linear mixed model or a nonlinear mixed model. Below, we fit
# linear mixed effect models with fixed effects for open & social or open, agree, & social, as well as
# random/nested effects for class within school; to predict scores on the outcome variable, extroversion (extro).
# Note in the output we can use the Baysian Information Criterion (BIC) to compare models; which is
# similar to, but more conservative than (and thus preferred over) the AIC mentioned previously. Like AIC; lower
# BIC reflects better model fit (i.e., a BIC of -22.634 is better than a BIC of 5.657). The 'lmer' function uses
# REstricted Maximum Likelihood (REML) to estimate the variance components (which is preferred over
# standard Maximum Likelihood; also available as an option). Also note, the optional argument
# family="binomial" can be used for non-Gaussian models (family = "gaussian" is the default).
# Note below, class is nested within school, class is 'under' school. Random effects are specified inside
# parentheses and can be repreated measures, interaction terms, or nested (as is the case here). Simple
# interactions simply use the colon separator: (1|school:class).
lmm.1 <- lmer(extro ~ open + social + class + (1|school/class), data = lmm.data, family = "gaussian")
summary(lmm.1)
lmm.2 <- lmer(extro ~ open + agree + social + class + (1|school/class), data = lmm.data)
summary(lmm.2)
# To extract the estimates of the fixed effects parameters.
fixef(lmm.2)
# To extract the estimates of the random effects parameters.
ranef(lmm.2)
# To extract the coefficients for each group of the random effect factor (class = 2 groups + school
# = 2 groups == 4 groups).
coef(lmm.2)
coef(lmm.2)$'class:school'
# To extract the predicted values (based on the fitted model).
yhat <- fitted(lmm.2)
summary(yhat)
# To extract the residuals (errors); and summarize, as well as plot them.
residuals <- resid(lmm.2)
summary(residuals)
hist(residuals)
### One other thing worth taking a look at is the Intra Class Correlation.
# First, run the 'null' model (which includes just the intercepts and the random effect for the highest level
# of the nesting variables; in this example 'school'.
lmm.null <- lmer(extro ~ 1 + (1|school), data = lmm.data)
summary(lmm.null)
# Notice the variance component estimates for the random effect. If we add these together, then
# divide that total by the 'school' variance estimate; we get the ICC
95.8720 + 7.1399
95.8720 / 103.0119
# This indicates that 93.06886% of the variance in 'extro' can be "explained" by school
# group membership (verified below using Bliese's multilevel package).
# ICC1 and ICC2 as described by Bliese.
library(multilevel)
aov.1 <- aov(extro ~ school, lmm.data)
summary(aov.1)
# Below (ICC1) indicates that 93.07% of the variance in 'extro' can be "explained" by school
# group membership.
ICC1(aov.1)
# The ICC2 value (below) of .9996 indicates that school groups can be very reliably differentiated in terms of
# 'extro' scores.
ICC2(aov.1)
detach("package:multilevel")
#### MCMC methods.
# To create a posterior distribution (n = 5000) using Markov Chain Monte Carlo (MCMC) methods:
mcmc.5000 <- mcmcsamp(lmm.2, saveb = TRUE, n = 5000)
# Show the structure of objects in the 'mcmcsamp' object.
str(mcmc.5000)
# Fixed effect parameter estimates resulting from the 'mcmcsamp' function applied to the fitted model (lmm.3).
mcmc.5000@fixef
# Random effect parameter estimates resulting from the 'mcmcsamp' function applied to the fitted model (lmm.3).
mcmc.5000@ranef
# Extract and summarize the deviance statistic from the 'mcmcsamp' object.
dev <- as.vector(mcmc.5000@deviance)
summary(dev)
# To create the Highest Posterior Density (HPD) intervals for the parameters of an MCMC sample
# (which essentially provides confidence intervals for the posterior parameter estimates):
HPDinterval(mcmc.5000, prob = 0.95)
# Then, we can extract elements of the HPDinterval suing the "$" operator; for example the fixed
# effects intervals ($fixef).
HPDinterval(mcmc.5000, prob = 0.95)$fixef
#####################################################################################################################
#####################################################################################################################
# NOTE: These results will not match up with the same procedures on the same data, run in SPSS (Linear Mixed Models)
# or run in SAS (PROC MIXED); although SPSS and SAS match one another. It has been discovered that the descrepancy
# is due to different reference coding of the categorical variables when in SPSS and SAS compared to the 'lme4'
# package and 'lmer' function. Essentially, all the R functions used here (in this script) code categorical
# factors / variables so that the reference category is the category with the LOWEST numerical value (or
# alphabetically first letter). SPSS and SAS both use the opposite strategy; they code
# categorical factors / variables so that the reference category is the category with the HIGHEST numerical
# value (or alphabetically last letter). This is important to note because, the SPSS/SAS
# Mixed Effects model output produces an intercept term for the fixed effects which is substantially different
# from the intercept term for the fixed effects produced by the 'lme4' package; and of course, with different
# intercepts comes different predicted values based on the model. If interested in getting SPSS or SAS output
# to match what is produced by this script, then simply reverse code the values of the categorical variables
# when the data is imported to SPSS or SAS. Meaning, for instance with the class variable; any case with a value
# of "a" would be changed to a value of "d" and vice versa, any case with a value of "c" would be changed to a
# value of "b" and vice versa.
detach("package:lme4")
####################################################################################
######## Additional Considerations ########
###### CENTERING ######
# In many situation in social science, the predictor variables we deal with do not have a meaningful (or true) zero
# point. For instance, a person is not going to have a intelligence test score of zero. In these
# situations it is common to do some type of Centering. Grand mean centering is much more common than Group
# mean centering because, Group mean centering can change the estimation, fit, and interpretation of hierarchical
# linear models. Grand mean centering results in equivalent models as would be the case if raw scores were used.
# Using either method, the mean of the predictor variable becomes zero after subtracting the Grand mean
# or Group mean from each score. Grand mean centering predictor variables at level one often makes interpretation
# easier and can decrease multicollinearity.
######### REFERENCES & RESOURCES #########
# Akaike, H. (1974). A new look at the statistical model identification. I.E.E.E. Transactions on Automatic Control, AC 19, 716 – 723.
# Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Akaike_1974.pdf
# Bartko, J. J. (1976). On various intraclass correlation reliability coefficients. Psychological Bulletin, 83, 762-765.
# http://www.unt.edu/rss/class/Jon/MiscDocs/Bartko_1976.pdf
# Bates, D., & Maechler, M. (2010). Package ‘lme4’. Reference manual for the package, available at:
# http://cran.r-project.org/web/packages/lme4/lme4.pdf
# Bates, D. (2010). Linear mixed model implementation in lme4. Package lme4 vignette, available at:
# http://cran.r-project.org/web/packages/lme4/vignettes/Implementation.pdf
# Bates, D. (2010). Computational methods for mixed models. Package lme4 vignette, available at:
# http://cran.r-project.org/web/packages/lme4/vignettes/Theory.pdf
# Bates, D. (2010). Penalized least squares versus generalized least squares representations of linear mixed models. Package lme4
# vignette, available at:
# http://cran.r-project.org/web/packages/lme4/vignettes/PLSvGLS.pdf
# Bliese, P. (2009). Multilevel modeling in R: A brief introduction to R, the multilevel package and the nlme package. Available at:
# http://cran.r-project.org/doc/contrib/Bliese_Multilevel.pdf
# Draper, D. (1995). Inference and hierarchical modeling in the social sciences. Journal of Educational and Behavioral Statistics, 20(2),
# 115 - 147. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Draper_1995.pdf
# Fox, J. (2002). Linear mixed models: An appendix to “An R and S-PLUS companion to applied regression”. Available at:
# http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-mixed-models.pdf
# Gelman, A. (2005). Analysis of variance -- why it is more important than ever. The Annals of Statistics, 33(1), 1 -- 53. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Gelman_2005.pdf
# Hofmann, D. A., Griffin, M. A., & Gavin, M. B. (2000). The application of hierarchical linear modeling to organizational research.
# In K. J. Klein (Ed.), Multilevel theory, research, and methods in organizations: Foundations, extensions, and new directions (p. 467 - 511).
# San Francisco, CA: Jossey-Bass. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Hofmann_2000.pdf
# Raudenbush, S. W. (1995). Reexamining, reaffirming, and improving application of hierarchical models. Journal of Educational and Behavioral
# Statistics, 20(2), 210 - 220. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Raudenbush_1995.pdf
# Raudenbush, S. W. (1993). Hierarchical linear models and experimental design. In L. Edwards (Ed.), Applied analysis of variance in behavioral
# science (p. 459 - 496). New York: Marcel Dekker. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Raudenbush_1993.pdf
# Rogosa, D., & Saner, H. (1995). Longitudinal data analysis examples with random coefficient models. Journal of Educational and Behavioral
# Statistics, 20(2), 149 - 170. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Rogosa_1995.pdf
# Schwarz, G. (1978). Estimating the dimension of a model. Annals of Statistics, 6, 461 – 464. Available at:
# http://www.unt.edu/rss/class/Jon/MiscDocs/Schwarz_1978.pdf
#### End: Nov. 2010.