#
#
############### Using the hetcor function in the polycor library ###############
#
# This script allows us to see the benefits of using the hetcor function to get
# the correct type of correlation for each type of variable pair, based on each
# variable's measurement scale (nominal, ordinal, interval/ratio).
# The hetcor function allows us to do this; all at once.
# From the hetcor help documentation file, hetcor:
# Computes a heterogenous correlation matrix, consisting of Pearson
# product-moment correlations between numeric variables, polyserial
# correlations between numeric and ordinal variables, and polychoric
# correlations between ordinal variables.
# This script assumes you have worked through all the previous notes from
# the web page and you have downloaded, installed, and updated all available
# R packages.
# If not already loaded, load the 'foreign' library (to import an SPSS.sav data file).
library(foreign)
# Start by using the 'foreign' library to import 'TrialHet.sav' (available from the web
# page) assigning the name 'het.trial'.
het.trial <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module6/M6_TrialHet.sav",
use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE)
attach(het.trial)
# Create a subset of the data to remove the "code" variable.
subset1 <- data.frame(x1,x2,x3,extroversion,openness,agreeable,neuroticism)
detach(het.trial)
head(subset1, 15)
# Using the 'hetcor' function (in the 'polycor' library) to get the proper correlations as
# necessary for each variable pair.
library(polycor)
# A quick example of what you get with the hetcor function (takes a minute or so).
hetcor(subset1)
# Extracting just the correlation matrix and assigning it to an object (het.cor), for
# comparison further below (NOTE: it works, despite the warnings).
het.cor <- hetcor(subset1)$cor
# Getting normal (Pearson) correlations for comparison.
# Data must be in numeric matrix form for this.
tot.m <- apply(subset1,2,as.numeric)
norm.cor <- cor(tot.m)
# Getting the Robust (percentage bend [pb]) correlations for comparison.
# To install this package directly within R type: install.packages("WRS", repos="http://R-Forge.R-project.org")
library(WRS)
# Percentage bend correlations.
pb.cor <- pball(tot.m, beta = 0.2)$pbcorm
# COMPARISON of the three types of correlation matrices.
# Note that the Percentage bend correlations are lower than the normal correlations
# because, some extreme values which actually strengthened the relationships
# were removed during the 20% trim.
pb.cor
norm.cor
het.cor
# Simple scatterplot matrix; in which you can see the first 3 variables (x1,x2,x3) are discrete.
pairs(tot.m)
############ Investigation of Non-Linearity.
# One common concern with social science data is the possibility of non-linearity
# among the variables.
# If we focus on just the numeric variables (attachment,selfcon,income,depression);
# we will quickly see some evidence of non-linearity in the current data set.
# First, let's get a scatterplot matrix (use Rcmdr --> Graphs --> Scatterplot Matrix)
# of our numeric variables with smoothers. The following syntax should be produced in
# Rcmdr.
library(car)
scatterplotMatrix(~extroversion+openness+agreeable+neuroticism, reg.line=lm, smooth=TRUE, span=0.5, diagonal = 'density',
data=subset1)
## TO SEE THIS SCATTERPLOT MATRIX, paste the following into your browser and hit the enter key:
## http://www.unt.edu/rss/class/Jon/R_SC/Module6/M6_TrialHetScatter.pdf
# Just looking at the scatterplot matrix, we can see some curvilinear relationships.
# We can empirically or formally test for non-linearity using Rcmdr; but first we must
# fit a model.
# In Rcmdr, go through Statistics --> Fit Models --> Linear regression...
# Then specify Attachment as the outcome and depression, income, selfcon as predictors.
RegModel.1 <- lm(extroversion~openness+agreeable+neuroticism, data=subset1)
summary(RegModel.1)
# Now, we have a model specified in Rcmdr, we can go to Models --> Numerical Tests -->
# RESET test for nonlinearity...
# Which should produce the following syntax in Rcmdr.
library(lmtest, pos=4)
resettest(extroversion~openness+agreeable+neuroticism, power=2:3,
type="regressor", data=subset1)
# The test provides an F-value and a p-value for the F-value. Here, the
# F-value is 2.0851 and the p-value is 0.05248.
# So, we might not be convinced about the presence of non-linearity; however,
# if we look at one (or a few) pairwise comparisons we might find evidence of
# non-linearity.
# Here, we specify a linear regression model with attachment predicted by income.
# Again, in Rcmdr.
RegModel.2 <- lm(extroversion~agreeable, data=subset1)
summary(RegModel.2)
# Then run the RESET test on this model (in Rcmdr).
resettest(extroversion~agreeable, power=2:3, type="regressor", data=subset1)
# Which provides the following output (p-value = .0000722); providing
# strong evidence for non-linearity.
RESET test
data: attachment ~ income
RESET = 9.6257, df1 = 2, df2 = 1019, p-value = 7.222e-05
## For more information on the RESET test for nonlinearity:
## http://en.wikipedia.org/wiki/Ramsey_RESET_test
## End: Minor updates, Mar. 2, 2011.