#
#
############### Discriminant Function Analysis ###############
#
# This script assumes you have worked through all the previous notes from
# the web page and you have downloaded, installed, and updated all available
# R packages.
# Load the example data.
data.DFA <- read.table("http://www.unt.edu/rss/class/Jon/R_SC/Module6/DFA_df.txt",
header=TRUE, sep=",", na.strings="NA", dec=".", strip.white=TRUE)
summary(data.DFA)
# Next, change the outcome variable (which is currently numeric) to a factor.
data.DFA$y <- as.factor(data.DFA$y)
summary(data.DFA)
# Linear Discriminant Analysis with Jacknifed Prediction
library(MASS)
model.1.DFA <- lda(y ~ x1 + x2 + x3, data = data.DFA,
na.action="na.omit", CV=FALSE)
model.1.DFA
model.2.DFA <- lda(y ~ x1 + x2 + x3, data = data.DFA,
na.action="na.omit", CV=TRUE)
model.2.DFA
# Assess the accuracy of the prediction percent correct for each category of y
percent.correct <- table(data.DFA$y, model.2.DFA$class)
diag(prop.table(percent.correct, 1))
# Total percent correct
sum(diag(prop.table(percent.correct)))
# Classification of multivariate observations (the following includes three
# elements: $class, $posterior, $x -- the $x contains the function scores
# for each row on each function).
predict(model.1.DFA)
# To extract just the function scores from the first function of Model 1 and
# assign them to an object ("scores").
scores <- predict(model.1.DFA)$x[,1]
scores
# Given the known group sizes (197, 199, 204); we can get descriptives of the
# function scores such as the Centroids (means) and the standard deviations
# for each group.
summary(data.DFA$y)
mean(scores[1:197]); sd(scores[1:197])
mean(scores[198:396]); sd(scores[198:396])
mean(scores[397:600]); sd(scores[397:600])
# To continue analyzing the the Centroids, we combine the scores into a data frame.
scores.df <- data.frame(data.DFA[,2], scores); rm(scores)
names(scores.df)[1] <- "group"
head(scores.df, 25)
# A oneway Analysis of Variance can then be run on the scores; two types of
# post-hoc testing are shown.
summary(aov(scores ~ group, scores.df))
pairwise.t.test(scores.df$scores, scores.df$group, p.adj = "bonf",
pool.sd = TRUE, paired = FALSE)
TukeyHSD(aov(scores ~ group, scores.df))
# Effect sizes for the differences between centroids.
library(MBESS)
smd(Group.1 = scores.df[1:197,2], Group.2 = scores.df[198:396,2])
smd(Group.1 = scores.df[1:197,2], Group.2 = scores.df[397:600,2])
smd(Group.1 = scores.df[198:396,2], Group.2 = scores.df[397:600,2])
# Cleaning up the workspace.
search()
detach("package:MBESS")
detach("package:MASS")
search()
ls()
rm(data.DFA, model.1.DFA, model.2.DFA, percent.correct, scores.df)
ls()
### REFERENCES AND RESOURCES:
#
# Hair, J. F., Anderson, R. E., Tatham, R. L., & Black, W. C. (1998). Multivariate Data
# Analysis (5th ed.). Upper Saddle River, NJ: Prentice Hall.
#
# Mertler, C. A., & Vannatta, R. A. (2002). Advanced and Multivariate Statistical Methods (2nd
# ed.). Los Angeles, CA: Pyrczak Publishing.
#
# Pedhazur, E. J. (1997). Multiple Regression in Behavioral Research: Explanation and
# Prediction (3rd ed.). Wadsworth/Thomson Learning (now Cengage Learning): United States.
#
# Tabachnick, B. G., & Fidell, L. S. (2001). Using Multivariate Statistics (4th ed.).
# Needham Heights, MA: Allyn & Bacon.
#
#
# End; Updated February, 26, 2013.