# # ############ Basic Multidimensional Scaling ############ # # # This script assumes you have worked through all the previous notes from # the web page and you have downloaded, installed, and updated all available # R packages. # Load the following libraries if you have not already. library(Rcmdr) library(foreign) # Multidimensional scaling is used to discover the underlying structure of distance measures # between objects or cases. Essentially, MDS assigns observations to specific locations in # a conceptual space (usually 2 or 3 dimensional space) such that the distances between # points in the space match the given dissimilarities as closely as possible. MDS is # similar to but sometimes preferred over factor analysis because, MDS does not rely # on most common assumptions (linearity, multivariate normality, etc.). In fact, the # only assumptions of MDS are the number of dimensions cannot exceed the number of objects # minus one; which also means at least three variables must be entered in the model and # at least two dimensions must be specified. # Typically, there are two types of MDS: # (1) classical MDS which involves numeric data (preferrably variables in the same scale), # which uses the 'cmdscale' function (in the stats library -- which is included with the # base install of R) and # (2) nonmetric MDS which involves data which is not necessarily all numeric, which # uses the 'isoMDS' function (MASS library). # # Data can be passed to R functions in the form of proximity matrices or variables matrices # (including correlation matrices) which are converted into proximity matrices (using # the 'dist' function). ############ Classical MDS ############ ### Example 1. # One way to familiarize oneself with MDS is by using the help documentation example from the 'cmdscale' function. # This exmaple uses the 'eurodist' data available in the stats library (which auto loads when you start R). # Load the 'eurodist' data. data(eurodist) eurodist # Run the MDS. euro.mds <- cmdscale(eurodist) euro.mds # If you would like the full output, including Goodness-of-fit (GOF); add the 'eig = TRUE' statement. eur.mds <- cmdscale(eurodist, eig = TRUE) eur.mds # Assign names (dimension numbers) to the result vectors. Dim1 <- euro.mds [,1] Dim2 <- euro.mds [,2] # Plot the solution. plot(Dim1, Dim2, type="n", xlab="", ylab="", main="cmdscale(eurodist)") segments(-1500, -0, 1500, 0, lty="dotted") segments(0, -1500, 0, 1500, lty="dotted") text(Dim1, Dim2, rownames(euro.mds), cex=0.8) # Same plot as above; but with different markers and color. plot(Dim1, Dim2, xlab="", ylab="", main="cmdscale(eurodist)") segments(-1500, -0, 1500, 0, lty="dotted") segments(0, -1500, 0, 1500, lty="dotted") text(Dim1, Dim2, rownames(euro.mds), cex=0.8, col="red") ### Example 2. # Use the 'foreign' library to import the 'kinship_dat.sav' SPSS data file. This data comes from the # PASW Categories 18 module. kinship.1 <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module9/MDS/kinship_dat.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) # Notice the kinship data is in a multiple-source-matrix format. kinship.1 # Use only the first source (first 15 rows) without the 'sourceid' variable. kinship.2 <- kinship.1 [1:15, 1:15] kinship.2 # Put the kinship data into a distance matrix format. kin.dist <- dist(kinship.2) kin.dist # Apply the MDS analysis using the 'cmdscale' function and assigning to an object (mds2); by default # the function returns a 2 dimensional solution. mds2 <- cmdscale(kin.dist) mds2 # Returns the 2 dimensional vector values. # Apply the MDS analysis specifying a 3 dimensional solution. mds3 <- cmdscale(kin.dist, k = 3) mds3 # Returns the 3 dimensional vector values. # Simple two dimensional solution plot. plot(mds2) # Rename the columns to indicate dimension names or a data frame. Dim1 <- mds2[,1] Dim2 <- mds2[,2] # Plot with correct labels. plot(Dim1, Dim2, type="n", xlab="", ylab="", main="cmdscale(kin.dist)") segments(-1500, -0, 1500, 0, lty="dotted") segments(0, -1500, 0, 1500, lty="dotted") text(Dim1, Dim2, colnames(kinship.2), cex=0.8, col="red") # Three dimensional solution plot. library(scatterplot3d) scatterplot3d(mds3, color="dark blue", pch=1, main="Multidimensional Scaling 3-D Plot", sub="Three Dimensional Solution", grid=TRUE, box=TRUE) mds3 ############ NonMetric MDS ############ # Load the MASS package/library if it is not already loaded (loads with Rcmdr). library(MASS) # Load the data used in the help file for the 'isoMDS' function. data(swiss) summary(swiss) swiss nrow(swiss) # Convert the data into a distance matrix. swiss.dist <- dist(swiss) # Run the MDS on the distance data with the default 2-dimension solution. swiss.mds <- isoMDS(swiss.dist) swiss.mds # Display just the points (dimension values). swiss.mds\$points summary(swiss.mds\$points) # Basic Plot. plot(swiss.mds\$points, type = "n") text(swiss.mds\$points, labels = as.character(1:nrow(swiss))) # Slightly more complex plot. plot(swiss.mds\$points, type = "n") segments(-75, -0, 55, 0, lty="dotted") segments(0, -75, 0, 35, lty="dotted") text(swiss.mds\$points, labels = row.names(swiss), col = "red") ################################## REFERENCES & RESOURCES ################################## # The 'dist' function: http://sekhon.berkeley.edu/stats/html/dist.html # Using the 'cmdscale' function for Classical MDS: http://sekhon.berkeley.edu/stats/html/cmdscale.html # Using the 'ISOmds' function for Nonmetric MDS: http://stat.ethz.ch/R-manual/R-patched/library/MASS/html/isoMDS.html # Brief tutorial on MDS at Quick-R: http://www.statmethods.net/advstats/mds.html # Brief tutorial on MDS: http://www.unt.edu/rss/class/Jon/R_SC/Module9/MDS/Ch_multidimensional_scaling.pdf # MASS library documentation in 'pdf': http://www.unt.edu/rss/class/Jon/R_SC/LibraryDocumentation/MASS.pdf