# # ####### Variable Conversions ####### # # Import an example data file. library(foreign) het.trial <- read.spss("http://www.unt.edu/rss/class/Jon/R_SC/Module6/M6_TrialHet.sav", use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) attach(het.trial) detach("package:foreign") # Creating a smaller (more managable) example data frame. subset1 <- data.frame(x1,x2,x3,extroversion,openness,agreeable,neuroticism) detach(het.trial) rm(het.trial) # Notice, the first three variable (x1, x2, x3) of subset1 are factors; while # the other four variables are numeric. summary(subset1) is.numeric(subset1[,1]) is.factor(subset1[,1]) is.numeric(subset1\$x2) is.factor(subset1\$x2) is.numeric(subset1[,3]) is.factor(subset1[,3]) is.numeric(subset1[,4]) is.factor(subset1[,4]) # Convert a single variable from factor to numeric (there are several ways to do this). new.x1 <- as.numeric(subset1[,1]) is.numeric(new.x1) new.x2 <- as.numeric(subset1\$x2) is.numeric(new.x2) new.x3 <- sapply(subset1[,3], as.numeric) is.numeric(new.x3) # Going from numeric to factor is the same. old.x3 <- as.factor(new.x3) is.factor(old.x3) # For those who favor Likert scales, you can also specify a variable as 'ordered' which # gives it the properties of an ordinal scaled factor; it is important to note that # 'ordered' is simply an attribute of a factor. A factor variable may be ordered (i.e. ordinal) # or not (i.e. categorical). ord.x3 <- ordered(old.x3) is.factor(ord.x3) is.numeric(ord.x3) is.ordered(ord.x3) # One can also use the 'as.factor' function to convert numeric to "ordered" factor # by simply using the optional argument 'ordered' in the 'as.factor' function. z <- 1:4 z <- as.numeric(sample(z, 100, replace = TRUE)) summary(z) z is.numeric(z) z.ord <- factor(z, levels = c(1, 2, 3, 4), labels = c("strongly disagree", "disagree", "agree", "strongly agree"), ordered = TRUE) summary(z.ord) z.ord is.factor(z.ord) is.ordered(z.ord) rm(z, z.ord) ### Converting more than one variable from factor (or other type[s]) to numeric. # One way is to use the 'data.matrix' function; the 'data.matrix' function # returns an object which contains numeric vectors (variables). subset2 <- data.matrix(subset1) summary(subset1) summary(subset2) is.numeric(subset2[,1]) is.factor(subset2[,1]) is.numeric(subset2[,2]) is.factor(subset2[,2]) is.numeric(subset2[,3]) is.factor(subset2[,3]) is.numeric(subset2[,4]) is.factor(subset2[,4]) # Another way to accomplish this is with an 'apply' statement. subset3 <- apply(subset1, 2, as.numeric) summary(subset1) summary(subset3) is.numeric(subset3[,1]) is.factor(subset3[,1]) is.numeric(subset3[,2]) is.factor(subset3[,2]) is.numeric(subset3[,3]) is.factor(subset3[,3]) is.numeric(subset3[,4]) is.factor(subset3[,4]) # Going back to factors from numeric, first three columns of subset3 [1:3]. # First, convert the data matrix (subset3) to a data.frame. subset3 <- data.frame(subset3) subset4 <- apply(subset3[,1:3], 2, as.factor) subset4 <- data.frame(subset4, subset3[,4:6]) summary(subset3) summary(subset4) is.numeric(subset4[,1]) is.factor(subset4[,1]) is.numeric(subset4[,2]) is.factor(subset4[,2]) is.numeric(subset4[,3]) is.factor(subset4[,3]) is.numeric(subset4[,4]) is.factor(subset4[,4]) # Cleaning up the workspace. ls() rm(new.x1, new.x2, new.x3, old.x3, ord.x3, subset1, subset2, subset3, subset4) ls() # END; Mar. 6, 2012.