# # ##### Bootstrap function to be used with package "lavaan". ##### # ##### THANKS TO RICH HERRINGTON for writing the function below. ##### # The function only works with CFA models (as below), but in time it # may also be adapted for use with SEM structural models as well # as Growth Curve models. # # Read in the 'boot.lavaan' function / specify the 'boot.lavaan' function. # <<<<<<<<<<<<< BEGIN FUNCTION >>>>>>>>>>>>>> boot.lavaan<- function (data, weights=rep(1/nrow(data),nrow(data)), model, R = 100, N=nrow(data), try.iter=20) # # Note: The value of the calling argument to set the sampling weights is: # # - an un-weighted resampling as the default: # (e.g. weights=rep(1/nrow(data),nrow(data)) # # - a weighted resampling if the "weights" argument is given a column # of weights as a calling argument to the parameter "weights": # (e.g. weights=data$weight) # { refit <- function(data,weights,model) { N<-nrow(data) indices <- sample(N, N, replace = TRUE, prob=weights) sample.data<-data[indices,] S <- cor(sample.data) omega.coef<-omega(sample.data,plot=FALSE)$omega_h refitted.model <-cfa(model.syntax=model, data=sample.data, std.lv = TRUE, meanstructure="default", std.ov=TRUE, se="robust", orthogonal=TRUE, estimator="MLR", group=NULL, group.equal=NULL) lavaan.coefs<-coef(refitted.model) # Only include statistics for bootstrap sampling that have some variation # across subsamples refitted.model.fit<-inspect(refitted.model,"fit")[c(1,4,7:8,11,14:20,22:25,27:34,36)] coeffs<-c(lavaan.coefs,refitted.model.fit,omega.coef) lavaan.names<-names(lavaan.coefs) names(coeffs)<-c(lavaan.names, names(refitted.model.fit),"omega") return(list(coef=coeffs, cor.matrix=S)) } if (!require("psych")) stop("package psych not available") if (!require("boot")) stop("package boot not available") if (!require("sem")) stop("package sem is not available") if (!require("lavaan")) stop("package lavaan not available") if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1) seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE) warn <- options(warn = -2) on.exit(options(warn)) nErrors <- 0 model.fit <-cfa(model.syntax=model, data=data, std.lv = TRUE, meanstructure="default", std.ov=TRUE, se="robust", orthogonal=TRUE, estimator="MLR", group=NULL, group.equal=NULL) orig.sem.fit.1<-inspect(model.fit,"coef")$lambda orig.sem.fit.2<-inspect(model.fit,"se")$lambda orig.sem.fit.3<-inspect(model.fit,"fit") orig.sem.fit.3<-orig.sem.fit.3[c(1,4,7:8,11,14:20,22:25,27:34,36)] orig.sem<-list(orig.sem.fit.1,orig.sem.fit.2,orig.sem.fit.3) orig.sem.fit.names<-names(orig.sem[[3]]) coefficients<-c(coef(model.fit),orig.sem[[3]],omega(data,plot=FALSE)$omega_h) coef.names <- names(coef(model.fit)) names(coefficients)<-c(coef.names,orig.sem.fit.names,"omega") var.names <- names(coefficients) coefs <- matrix(numeric(0), R, length(coefficients)) colnames(coefs)<- c(coef.names,orig.sem.fit.names,"omega") cor.sum<-0 cor.sum.squared.dev<-0 coef.sum<-0 for (b in 1:R) { print(b) print(date()) for (try in 1:try.iter) { if (try > try.iter) stop("exceeded default setting on consecutive convergence failures") res <- try(refit(data,weights,model), silent = FALSE) if (inherits(res, "try-error")) nErrors <- nErrors + 1 else { coefs[b,]<- res$coef coef.sum <- coef.sum + res$coef cor.sum <- cor.sum + res$cor.matrix boot.avg.cor <- cor.sum/b boot.avg.coef<- coef.sum/b squared.dev <- (boot.avg.cor-res$cor.matrix)^2 cor.sum.squared.dev <- cor.sum.squared.dev + squared.dev boot.std.err.cor <- sqrt(cor.sum.squared.dev/b) orig.cor.matrix<-cor(data) bias.cor.matrix<-orig.cor.matrix-boot.avg.cor break() } } } options(warn) if (nErrors > 0) warning("there were", nErrors, "apparent convergence failures;\nthese are discarded from the", R, "bootstrap replications returned") res <- list(t0 = coefficients, t = coefs, R = R, data = data, seed = seed, statistic = refit, sim = "ordinary", stype = "i", call = match.call(), strata = rep(1, N), weights = weights, boot.avg.cor=boot.avg.cor, boot.std.err.cor=boot.std.err.cor, bias.cor.matrix=bias.cor.matrix,boot.avg.coef=boot.avg.coef,orig.sem=orig.sem) class(res) <- c("bootsem", "boot") res } # <<<<<<<<<<<<< END FUNCTION >>>>>>>>>>>>>> ##### Example application of the boot.lavaan function. # Import the data from the web using the 'foreign' function (because the data # is in an SPSS.sav file format. library(foreign) exsem <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module8/SAS_Ex/SEMData.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) head(exsem) # Load the "lavaan" package. library(lavaan) # SPECIFY the Measurement model (CFA). m.model.1 <- ' Personality =~ extro + open + agree Engagement =~ social + cognitive + physical + cultural Crystallized =~ vocab + abstruse + block Fluid =~ common + sets + series ' # FITTING the Measurement model. m.model.fit <- cfa(model.syntax=m.model.1, data = exsem, std.lv = TRUE) summary(m.model.fit, fit.measures = TRUE, standardize = TRUE) # Applying the 'boot.lavaan' function to the fitted (measurement) model; # this took about 4 minutes on my work machine (2 gigs of RAM), R = 100 # is the number of bootstrapped samples; it is generally considered best # if one uses a few thousand bootstrapped samples. run.1 <- boot.lavaan(data = exsem, model = m.model.1, weights=rep(1/nrow(exsem),nrow(exsem)), R = 100, try.iter = 20) run.1 ##### Identify / extract elements of the output (which are not shown by default). names(run.1) # "t0" is the original sample estimate run.1$t0 # "t" contains the boostrap statistics for each boostrap sample run.1$t # Original model fit run.1$orig.sem # Bootstrap average of the correlations run.1$boot.avg.cor # Bootstrap standard errors of the correlations run.1$boot.std.err.cor # Bias estimates of the correlations run.1$bias.cor.matrix # Column names (data vectors) of available bootstrap statistics colnames(run.1$t) # Graphing the bootstrap distributions of several sample statistics run.1$t[,"Fluid=~sets"] par(mfrow=c(3,4)) hist(run.1$t[,"Fluid=~sets"]) qqnorm(run.1$t[,"Fluid=~sets"]) hist(run.1$t[,"rmsea"]) qqnorm(run.1$t[,"rmsea"]) hist(run.1$t[,"aic"]) qqnorm(run.1$t[,"aic"]) hist(run.1$t[,"bic"]) qqnorm(run.1$t[,"bic"]) hist(run.1$t[,"srmr"]) qqnorm(run.1$t[,"srmr"]) hist(run.1$t[,"omega"]) qqnorm(run.1$t[,"omega"]) # End; Mar. 3, 2011.