# # ############### Using the hetcor function in the polycor library ############### # # This script allows us to see the benefits of using the hetcor function to get # the correct type of correlation for each type of variable pair, based on each # variable's measurement scale (nominal, ordinal, interval/ratio). # The hetcor function allows us to do this; all at once. # From the hetcor help documentation file, hetcor: # Computes a heterogenous correlation matrix, consisting of Pearson # product-moment correlations between numeric variables, polyserial # correlations between numeric and ordinal variables, and polychoric # correlations between ordinal variables. # This script assumes you have worked through all the previous notes from # the web page and you have downloaded, installed, and updated all available # R packages. # If not already loaded, load the 'foreign' library (to import an SPSS.sav data file). library(foreign) # Start by using the 'foreign' library to import 'TrialHet.sav' (available from the web # page) assigning the name 'het.trial'. het.trial <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module6/M6_TrialHet.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) attach(het.trial) # Create a subset of the data to remove the "code" variable. subset1 <- data.frame(x1,x2,x3,extroversion,openness,agreeable,neuroticism) detach(het.trial) head(subset1, 15) # Using the 'hetcor' function (in the 'polycor' library) to get the proper correlations as # necessary for each variable pair. library(polycor) # A quick example of what you get with the hetcor function (takes a minute or so). hetcor(subset1) # Extracting just the correlation matrix and assigning it to an object (het.cor), for # comparison further below (NOTE: it works, despite the warnings). het.cor <- hetcor(subset1)$cor # Getting normal (Pearson) correlations for comparison. # Data must be in numeric matrix form for this. tot.m <- apply(subset1,2,as.numeric) norm.cor <- cor(tot.m) # Getting the Robust (percentage bend [pb]) correlations for comparison. # To install this package directly within R type: install.packages("WRS", repos="http://R-Forge.R-project.org") library(WRS) # Percentage bend correlations. pb.cor <- pball(tot.m, beta = 0.2)$pbcorm # COMPARISON of the three types of correlation matrices. # Note that the Percentage bend correlations are lower than the normal correlations # because, some extreme values which actually strengthened the relationships # were removed during the 20% trim. pb.cor norm.cor het.cor # Simple scatterplot matrix; in which you can see the first 3 variables (x1,x2,x3) are discrete. pairs(tot.m) ############ Investigation of Non-Linearity. # One common concern with social science data is the possibility of non-linearity # among the variables. # If we focus on just the numeric variables (attachment,selfcon,income,depression); # we will quickly see some evidence of non-linearity in the current data set. # First, let's get a scatterplot matrix (use Rcmdr --> Graphs --> Scatterplot Matrix) # of our numeric variables with smoothers. The following syntax should be produced in # Rcmdr. library(car) scatterplotMatrix(~extroversion+openness+agreeable+neuroticism, reg.line=lm, smooth=TRUE, span=0.5, diagonal = 'density', data=subset1) ## TO SEE THIS SCATTERPLOT MATRIX, paste the following into your browser and hit the enter key: ## http://www.unt.edu/rss/class/Jon/R_SC/Module6/M6_TrialHetScatter.pdf # Just looking at the scatterplot matrix, we can see some curvilinear relationships. # We can empirically or formally test for non-linearity using Rcmdr; but first we must # fit a model. # In Rcmdr, go through Statistics --> Fit Models --> Linear regression... # Then specify Attachment as the outcome and depression, income, selfcon as predictors. RegModel.1 <- lm(extroversion~openness+agreeable+neuroticism, data=subset1) summary(RegModel.1) # Now, we have a model specified in Rcmdr, we can go to Models --> Numerical Tests --> # RESET test for nonlinearity... # Which should produce the following syntax in Rcmdr. library(lmtest, pos=4) resettest(extroversion~openness+agreeable+neuroticism, power=2:3, type="regressor", data=subset1) # The test provides an F-value and a p-value for the F-value. Here, the # F-value is 2.0851 and the p-value is 0.05248. # So, we might not be convinced about the presence of non-linearity; however, # if we look at one (or a few) pairwise comparisons we might find evidence of # non-linearity. # Here, we specify a linear regression model with attachment predicted by income. # Again, in Rcmdr. RegModel.2 <- lm(extroversion~agreeable, data=subset1) summary(RegModel.2) # Then run the RESET test on this model (in Rcmdr). resettest(extroversion~agreeable, power=2:3, type="regressor", data=subset1) # Which provides the following output (p-value = .0000722); providing # strong evidence for non-linearity. RESET test data: attachment ~ income RESET = 9.6257, df1 = 2, df2 = 1019, p-value = 7.222e-05 ## For more information on the RESET test for nonlinearity: ## http://en.wikipedia.org/wiki/Ramsey_RESET_test ## End: Minor updates, Mar. 2, 2011.