# # ################ Correspondence Analysis Examples ################ # # This script assumes you have worked through all the previous notes from # the web page and you have downloaded, installed, and updated all available # R packages. # Load the following libraries if you have not already; if an additional # library is needed, it will be listed in the script and loaded there. library(foreign) library(Rcmdr) ###### Simiple (2 variable) Correspondence Analysis ###### # We will be reproducing the Correspondence Analysis David Garson has on his web page # using the GSS Subset data which comes with every installation of SPSS. # (for his example, see: http://faculty.chass.ncsu.edu/garson/PA765/correspond_SPSS.HTM) # Start by importing the 'GSS93 subset.sav' data file from the web using the # 'foreign' library to import the 'GSS93 subset.sav' data file assigning the # name 'Dataset'. Dataset <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module7/GSS93subset.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) attach(Dataset) summary(Dataset) # Do a cross-tabulation and get chi-square. table1 <- xtabs(~region4+politics, data=Dataset) table1 # Optional chi-square test. test1 <- chisq.test(table1, correct=FALSE) test1 # Load the 'ca' library; Correspondence Analysis. library(ca) # Run the correspondence analysis with the 'ca' function. ca(table1) # Plot the correspondence map. plot(ca(table1)) # Names of elements of the output. names(ca(table1)) # House cleaning. ls() detach(Dataset) rm(Dataset,table1,test1) ls() ################################################################## # The next few examples will use the "IntroPsych_Fall2009.txt" data set (which is fictional). # Next, import the data from the web and assigning the name 'Dataset2'. Dataset2 <- read.table("http://www.unt.edu/rss/class/Jon/R_SC/Module7/IntroPsych_Fall2009.txt", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) attach(Dataset2) summary(Dataset2) ## First example. # Get a cross-tabulation of the variables 'class_standing' and 'final_grade'. t1 <- table(class_standing, final_grade) t1 # Run the correspondence analysis with the 'ca' function. ca(t1) # Plot the map. plot(ca(t1)) ## Second example. # Get a cross-tabulation of the variables 'class_standing' and 'final_grade'. t2 <- table(family_income, final_grade) t2 # Run the correspondence analysis with the 'ca' function. ca(t2) # Plot the map. plot(ca(t2)) ###### Multiple Correspondence Analysis ###### subset <- data.frame(family_income, class_standing, final_grade) detach(Dataset2) attach(subset) summary(subset) # Inspect the cross-tabulation tables. t3 <- table(family_income, class_standing) t1 t2 t3 # Use library/package 'ca' [library(ca)] to # Conduct the Multiple (and Joint) Correspondence Analysis with the 'mjca' function. mjca(subset) # Multiple Correspondence Analysis plot. Keep in mind; these plots can be muddled or confusing # with large samples (as is the case here) because, the case numbers overlap one another. plot(mjca(subset)) # Same plot, but without the individual point labels. plot(mjca(subset), labels = c(0, 2)) # Same plot but with different colors / symbols. plot(mjca(subset), col = c("blue", "red"), pch = c(16, 0, 17, 0),, labels = c(0, 2)) plot(mjca(subset), col = c("black", "red"), pch = c(21, 0, 21, 0),, labels = c(0, 2)) rm(t1,t2,t3) ###### More plotting ###### # Create a new data frame from the dimension scores. mds1 <- data.frame(mjca(subset)$colcoord[c(1:13),c(1:2)]) mds1 detach(subset) # Rename the columns/variables to their true dimension names. names(mds1) [1] <- "dim1" names(mds1) [2] <- "dim2" # Add the row names. row.names(mds1) <- c("0-25", "26-50", "51-75", "76-inf","Freshman","Junior","Senior","Sophomore","A","B","C","D","F") mds1 attach(mds1) # Plot with correct (readable) labels. plot(mds1,ylim=c(-2.5,2.5),xlim=c(-2.5,2.5),col="red") segments(-3, -0, 3, 0, lty="dotted") segments(-0, -3, 0, 3, lty="dotted") text(x=dim1,y=dim2,labels=row.names(mds1),pos=4) detach(mds1) ############################################################################################ # Using Analysis of Variance with number grades. attach(Dataset2) AnovaModel.1 <- (lm(number_grade ~ class_standing*family_income, data=Dataset2)) Anova(AnovaModel.1) tapply(Dataset2$number_grade, list(class_standing=Dataset2$class_standing, family_income=Dataset2$family_income), mean, na.rm=TRUE) # means tapply(Dataset2$number_grade, list(class_standing=Dataset2$class_standing, family_income=Dataset2$family_income), sd, na.rm=TRUE) # std. deviations tapply(Dataset2$number_grade, list(class_standing=Dataset2$class_standing, family_income=Dataset2$family_income), function(x) sum(!is.na(x))) # counts # Grand mean: mean(number_grade) # Plot of means from ANOVA Model 1. plotMeans(number_grade, class_standing, family_income, error.bars="se") ############################################################################################ set1 <- data.frame(IQ,drinks_week,number_grade) set2 <- data.frame(confidence, hardworker,number_grade) set3 <- data.frame(age,IQ,number_grade) library(scatterplot3d) cor(set1) scatterplot3d(set1, color="purple", pch=23, main="Grades by IQ & Drinks per Week", sub="Introduction to Psychology Undergraduates", grid=TRUE, box=TRUE) cor(set2) scatterplot3d(set2, color="red", pch=22, main="Grades by Confidence & Hardworker", sub="Introduction to Psychology Undergraduates", grid=TRUE, box=FALSE) cor(set3) scatterplot3d(set3, color="blue", pch=21, main="Grades by Age & IQ", sub="Introduction to Psychology Undergraduates", grid=FALSE, box=FALSE) detach(Dataset2) summary(subset) scatterplot3d(subset, color="dark blue", pch=1, main="Letter Grades by Family Income & Class Standing", sub="Introduction to Psychology Undergraduates", grid=TRUE, box=TRUE, xlim=c(0,5), ylim=c(0,5),zlim=c(0,6))