library(pcaMethods) #this is purely an example to show ease of missing value imputation library(psych) #has the data data(bfi) #load the data #bfi=read.table("http://www.unt.edu/rss/class/mike/data/bfi.txt", header=T) #alternatively one can download it pcamod <- pca(bfi, nPcs = 5, method = "bpca") #summary(pcamod) #if you want to look at the pca output bfi2=data.frame(pcamod@completeObs) #the now completed dataset bfi3=t(bfi2) #transpose the data so that we can cluster variables library(cluster) data(bfi3) agn1 <- agnes(bfi3, method = "ward") #agnes = agglomerative nesting agn1 #AC is a quantity varying between 0 and 1. AC close to 1 indicates that a very clear structuring has been found. #AC close to 0 indicates that the algorithm has not found a natural structure. In other words, the data consists of only one big cluster. plot(agn1) #I personally don't care for the banner plot other than it shows clean cluster breaks. This is interesting to compare to a factor analysis. They serve different purposes but there are similarities in the results. For example, FA would show poor loading for A1, good loading for Ns. #membership cutree(agn1,2) #2 for 2 cluster solution classmemb=data.frame(FFA=rownames(bfi3),class=cutree(agn1,2)) #creates a dataset with just variables (five factor attribute) and cluster membership classmemb #Factor analysis comparison. Remove comment marks to runs #FAmod <- factanal(~A1+A2+A3+A4+A5+C1+C2+C3+C4+C5+E1+E2+E3+E4+E5+N1+N2+N3+N4+N5+O1+O2+O3+O4+O5,factors=5, rotation="promax", scores="none", data=bfi2) #print(FAmod, digits=2, cutoff=.3) #FAmod$factors