# # ############### Graphical Parameters ############### # help(par) colors() ############### Some data ############### # Need package 'car' for the recode function. library(car) n <- 100 id <- seq(1:n) sex <- seq(1:2) sex <- sample(sex, 100, replace = TRUE) sex <- recode(sex, "1 = 'Female'; 2 = 'Male'") level <- seq(1:4) level <- sample(level, 100, replace = TRUE) level <- recode(level, "1 = 'Freshman'; 2 = 'Sophomore'; 3 = 'Junior'; 4 = 'Senior'") age <- c(18:25) age <- sample(age, 100, replace = TRUE) x1 <- rnorm(n,10,2) x2 <- rnorm(n,10,1) x3 <- rnorm(n,12,3) e <- rnorm(n,0,2) y <- -1.5 + 1.2*x1 + .8*x2 + 0*x3 + e df.1 <- data.frame(id, sex, level, age, x1, x2, x3, y) rm(n, id, sex, level, age, x1, x2, x3, e, y) summary(df.1) ls() cor(df.1[4:8]) mod <- lm(y ~ x1 + x2 + x3, data = df.1) summary(mod) attach(df.1) ############### Graphing. ############### # Basic regression diagnostics. oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2)) plot(mod) par(oldpar) # Basic (boring) scatterplot. plot(x1,y) # Basic regression diagnostics (again). mod.x1 <- lm(y ~ x1, data = df.1) summary(mod.x1) oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2)) plot(mod.x1) par(oldpar) # A way-too-busy scatterplot; which shows some of the optional arguements and parameters. par(bg = "lightgray") plot(x1, y, pch = 3, cex = 2, col = "blue", xlim = c(6,15), ylim = c(12,25)) abline(reg = mod.x1, col = "green", lwd = 2, lty = "solid") par(new=T) plot(lowess(x1,y,f=.2)\$x, lowess(x1,y,f=.2)\$y, lty = 2, col = "red", xlim = c(6,15), ylim = c(12,25)) box(grid(col="black",lty=2)) par(bg = "white") # Plotting the lowess function (only) from above. plot(x1,y) lines(lowess(x1,y, f=.2)) # Basic scatterplot matrix. pairs(df.1[4:8]) ## Complex Scatter matrix w/correlations in the upper panel where the font ## size of each correlation corresponds to the magnitude of the correlation. # **Note: all you would need to change to apply this to a new data frame is # to change the name of the data frame: listed as 'df.1' (only columns 3 to 7) # in the 'pairs' function line (last line of code). The 'panel.cor' object is # assigned a slightly complex function for setting up our plot design; the 'pairs' # function actually completes the plot. panel.cor <- function(x, y, digits=3, prefix="", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x,y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor) } pairs(df.1[4:8], lower.panel=panel.smooth, upper.panel=panel.cor) ##################### n <- 200 z <- 5.0 my.colors <- c("darkgreen", "darkblue") v <- 1 x <- seq(-5, 5, length.out = n) y <- plogis(x, location = 0, scale = z) plot(x,y, col = my.colors[v], xlim = c(-5,5), ylim = c(0,1), xlab = "X", ylab = "Y", pch = ".") for (i in 1:24){ z <- z - 0.2 v <- sample(my.colors, 1) x <- seq(-5, 5, length.out = n) y <- plogis(x, location = 0, scale = z) par(new=T) plot(x,y, col = v, xlim = c(-5,5), ylim = c(0,1), xlab = "X", ylab = "Y", pch = ".") Sys.sleep(.5) } rm(i, x, my.colors, v, y, z) ###################### x <- seq(-5, 5, length.out = n) y0 <- plogis(x, location = 0, scale = 1.5) y1 <- plogis(x, location = 0, scale = 1.25) y2 <- plogis(x, location = 0, scale = 1) y3 <- plogis(x, location = 0, scale = .75) y4 <- plogis(x, location = 0, scale = .5) y5 <- plogis(x, location = 0, scale = .25) plot(x,y0, col = "yellow", xlim = c(-5,5), ylim = c(0,1)) par(new=T) plot(x,y1, col = "orange", xlim = c(-5,5), ylim = c(0,1)) par(new=T) plot(x,y2, col = "red", xlim = c(-5,5), ylim = c(0,1)) par(new=T) plot(x,y3, col = "darkgreen", xlim = c(-5,5), ylim = c(0,1)) par(new=T) plot(x,y4, col = "blue", xlim = c(-5,5), ylim = c(0,1)) par(new=T) plot(x,y5, col = "purple", xlim = c(-5,5), ylim = c(0,1)) lines(x = c(-5,5), y = c(0,1.0), lwd = 2) rm(n, x, y0, y1, y2, y3, y4, y5) ###################### # Basic boxplot (changing the background to lightgray). par(bg = "lightgray") boxplot(y ~ sex, ylab = "y", col = "lightgreen") boxplot(y ~ level, ylab = "y", col = "green") # Basic histogram. hist(y, col = "lightblue1", main = "Histogram of y") # Basic kernal density function (kdf) plotted. plot(density(y), col = "blue") # Combining the histogram and kdf. hist(y, col = "lightblue1", main = "Histogram of y", xlim = range(y), prob = T) lines(density(y), col = "blue") # Combining histogram, and normal curve (black); with a proxi for the density function. h <- hist(x1, breaks = 10, col = "lightblue1") xfit <- seq(min(x1), max(x1), length = 40) yfit <- dnorm(xfit, mean = mean(x1), sd = sd(x1)) yfit <- yfit * diff(h\$mids[1:2]) * length(x1) lines(xfit, yfit, col = "black", lwd = 2) bob <- density(x1) lines(bob\$x, bob\$y * 100, col = "blue", lwd = 2) # Another way to achieve a similar overlay; here, a standard normal curve is # overlaid on the accurate densities: hist(x3, prob = T, breaks = 8, col = "lightblue1") lines(density(x3), lwd = 2, col = "blue") par(new = T) temp <- seq(-5, 5, length = 100) dtemp <- dnorm(temp) plot(temp, dtemp, type = "l", col = "black", lwd = 1, xlab = "", ylab = "", axes = FALSE) # Restore the background to white: par(bg = "white") ### The 'car' package contains the 'scatterplot' function. # The scatterplot function (& scatterplotMatrix function) has some amazing flexibility. help(scatterplot) # Smoother lines and lowess lines are good for checking for non-linearity. scatterplot(x1, y) # Ellipses are good for checking correlation (i.e. thin ellipse = high correlation). scatterplot(x1, y, ellipse=TRUE, span =.5) # Multiple variable matrices can quickly become difficult to interpret. scatterplotMatrix(df.1[4:8], smooth=TRUE, ellipse=TRUE, span =.5) scatterplotMatrix(df.1[c(5,6,8)], ellipse=TRUE, span =.5) cor(df.1[c(5,6,8)]) ## The 'lattice' package contain a variety of choices for ## producing plots & graphs specialized for multivariate data. # See the book: # Deepayan, S. (2008). Lattice: Multivariate Data Visualization with R. New York: Springer # Science+Business Media, LLC. library(lattice) histogram(~ x1 | sex) histogram(~ x1 | level, col = "lightblue1") # Multiple group's Density plots overlayed. densityplot(~ x1, data = df.1, groups = level, plot.points = FALSE, ref = TRUE, auto.key = list(columns = 2)) # Multiple group's bivariate scatterplots. xyplot(y ~ x1 | sex + level, data = df.1) # Q-Q plots. qqmath(~ y | factor(level), data = df.1, f.value = ppoints(100)) qq(sex ~ y | factor(level), data = df.1, f.value = ppoints(100), aspect = 1) # Boxplots (i.e. Box-and-Whisker plots). bwplot(y ~ sex, data = df.1, xlab = "y") bwplot(y ~ sex | level, data = df.1, xlab = "Gender", layout = c(4,1)) # Violin plots (compared with the normal boxplot). bwplot(y ~ level, data = df.1, ylim = c(10,30)) bwplot(y ~ level, data = df.1, panel = panel.violin, ylim = c(10,30), col = "lightblue2") # There is a great deal more one can do with the lattice package, see the book mentioned above. # As I come across interesting / useful ways for plotting data I'll add them here. # Originally posted: Jan. 2011 # Last updated: Feb 25, 2011