# # ############### Categorical Regression with Optimal Scaling ############### # # # This script assumes you have worked through all the previous notes from # the web page and you have downloaded, installed, and updated all available # R packages. # Load the following libraries if you have not already. library(foreign) library(Rcmdr) # The catreg2 data file contains 6 ordinal variables: y & x1 - x5. # Use the 'foreign' library to load the "catreg2.sav" data file. catreg2 <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module9/catreg2.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) attach(catreg2) summary(catreg2) # Remove the 'code' variable. catreg <- data.frame(y,x1,x2,x3,x4,x5) detach(catreg2) attach(catreg) summary(catreg) # Get the OLS regression models for later comparison to the optimally scaled regression models. # Raw regression model. reg1 <- lm(y~x1+x2+x3+x4+x5) # Standardized regression model. reg2 <- lm(scale(y)~scale(x1)+scale(x2)+scale(x3)+scale(x4)+scale(x5)) detach(catreg) # Run the optimal scaling, specifying the level of each variable ("ordinal") and retaining # the transformed (optimally scaled) data using the "$scoremat" identifier. # Optimal Scaling (homals function in the homals library). library(homals) opt.scale1 <- homals(catreg, ndim = 1, level = "ordinal", sets = list(1, 2:6), itermax = 10000)$scoremat # Create a data frame (for the regression) from the first dimension of the optimally scaled data. opt1 <- data.frame(opt.scale1[,,1]) attach(opt1) # Run the regressions on the optimally scaled data. # Non-standardized model based on optimally scaled (transformed) data. reg3 <- lm(y~x1+x2+x3+x4+x5) # Standardized regression model based on optimally scaled (transformed) data. reg4 <- lm(scale(y)~scale(x1)+scale(x2)+scale(x3)+scale(x4)+scale(x5)) detach(opt1) # Compare the regression models (note R-squared and adjusted R-squared). # Non-standardized (raw). summary(reg1) summary(reg3) # Standardized. summary(reg2) summary(reg4) # Given the wide range of values for the outcome variable (y), we could also # run the optimal scaling specifying y as numeric and the other variables as # ordinal. summary(catreg) # Run the optimal scaling, this time specifying y as numeric and the other 5 variables # as ordinal. opt.scale2 <- homals(catreg, ndim = 1, level = c("numerical","ordinal","ordinal","ordinal","ordinal", "ordinal"), sets = list(1, 2:6), itermax = 10000)$scoremat # Create a data frame (for the regression) from the first dimension of the optimally scaled data. opt2 <- data.frame(opt.scale2[,,1]) attach(opt2) # Run the regressions on the optimally scaled data. # Non-standardized. reg5 <- lm(y~x1+x2+x3+x4+x5) # Standardized. reg6 <- lm(scale(y)~scale(x1)+scale(x2)+scale(x3)+scale(x4)+scale(x5)) detach(opt2) # Comparisons summary(reg1) # Original data with OLS regression, all variables numeric summary(reg3) # Optimally Scaled data with categorical regression, all variables ordinal summary(reg5) # Optimally Scaled data with categorical regression, numeric outcome, ordinal predictors summary(reg2) # Standardized all variables numeric, OLS reg. summary(reg4) # Standardized all variables ordinal, cat reg. summary(reg6) # Standardized numeric outcome, ordinal predictors, cat reg. ############### REFERENCES / RESOURCES ############### See the 'homals' help file (package documentation): http://www.unt.edu/rss/class/Jon/R_SC/Module9/homals.pdf See the 'homals vignette' http://www.unt.edu/rss/class/Jon/R_SC/Module9/homals_vignette.pdf See the 'homals in R': http://www.unt.edu/rss/class/Jon/R_SC/Module9/homalsR.pdf Van Der Geer, J. P. (1993). Multivariate analysis of categorical data: Theory. Advanced Quantitative Techniques in the Social Sciences Series (Vol. 2). Sage Publications, Inc. Van Der Geer, J. P. (1993). Multivariate analysis of categorical data: Applications. Advanced Quantitative Techniques in the Social Sciences Series (Vol. 3). Sage Publications, Inc.