# # ############### Discriminant Function Analysis ############### # # This script assumes you have worked through all the previous notes from # the web page and you have downloaded, installed, and updated all available # R packages. # Load the example data. data.DFA <- read.table("http://www.unt.edu/rss/class/Jon/R_SC/Module6/DFA_df.txt", header=TRUE, sep=",", na.strings="NA", dec=".", strip.white=TRUE) summary(data.DFA) # Next, change the outcome variable (which is currently numeric) to a factor. data.DFA$y <- as.factor(data.DFA$y) summary(data.DFA) # Linear Discriminant Analysis with Jacknifed Prediction library(MASS) model.1.DFA <- lda(y ~ x1 + x2 + x3, data = data.DFA, na.action="na.omit", CV=FALSE) model.1.DFA model.2.DFA <- lda(y ~ x1 + x2 + x3, data = data.DFA, na.action="na.omit", CV=TRUE) model.2.DFA # Assess the accuracy of the prediction percent correct for each category of y percent.correct <- table(data.DFA$y, model.2.DFA$class) diag(prop.table(percent.correct, 1)) # Total percent correct sum(diag(prop.table(percent.correct))) # Classification of multivariate observations (the following includes three # elements: $class, $posterior, $x -- the $x contains the function scores # for each row on each function). predict(model.1.DFA) # To extract just the function scores from the first function of Model 1 and # assign them to an object ("scores"). scores <- predict(model.1.DFA)$x[,1] scores # Given the known group sizes (197, 199, 204); we can get descriptives of the # function scores such as the Centroids (means) and the standard deviations # for each group. summary(data.DFA$y) mean(scores[1:197]); sd(scores[1:197]) mean(scores[198:396]); sd(scores[198:396]) mean(scores[397:600]); sd(scores[397:600]) # To continue analyzing the the Centroids, we combine the scores into a data frame. scores.df <- data.frame(data.DFA[,2], scores); rm(scores) names(scores.df)[1] <- "group" head(scores.df, 25) # A oneway Analysis of Variance can then be run on the scores; two types of # post-hoc testing are shown. summary(aov(scores ~ group, scores.df)) pairwise.t.test(scores.df$scores, scores.df$group, p.adj = "bonf", pool.sd = TRUE, paired = FALSE) TukeyHSD(aov(scores ~ group, scores.df)) # Effect sizes for the differences between centroids. library(MBESS) smd(Group.1 = scores.df[1:197,2], Group.2 = scores.df[198:396,2]) smd(Group.1 = scores.df[1:197,2], Group.2 = scores.df[397:600,2]) smd(Group.1 = scores.df[198:396,2], Group.2 = scores.df[397:600,2]) # Cleaning up the workspace. search() detach("package:MBESS") detach("package:MASS") search() ls() rm(data.DFA, model.1.DFA, model.2.DFA, percent.correct, scores.df) ls() ### REFERENCES AND RESOURCES: # # Hair, J. F., Anderson, R. E., Tatham, R. L., & Black, W. C. (1998). Multivariate Data # Analysis (5th ed.). Upper Saddle River, NJ: Prentice Hall. # # Mertler, C. A., & Vannatta, R. A. (2002). Advanced and Multivariate Statistical Methods (2nd # ed.). Los Angeles, CA: Pyrczak Publishing. # # Pedhazur, E. J. (1997). Multiple Regression in Behavioral Research: Explanation and # Prediction (3rd ed.). Wadsworth/Thomson Learning (now Cengage Learning): United States. # # Tabachnick, B. G., & Fidell, L. S. (2001). Using Multivariate Statistics (4th ed.). # Needham Heights, MA: Allyn & Bacon. # # # End; Updated February, 26, 2013.