# # ### Examples of False Discovery Rate (FDR) # ################################################################################ # Each iteration below will create data for 200 cases across a grouping variable # and an outcome variable; then a t-test will be applied and the t-value and # p-value will be stored in each row of the 'results' data.frame. n <- 200 g <- factor(c("black","brown")) m <- seq(80, 120, by = 1) sd <- seq(14, 16, by = .1) results <- as.data.frame(matrix(rep(0, n*2), ncol = 2)) names(results) <- c("t.vals", "p.vals") data.list <- as.list(0) for (i in 1:n){ groups <- sort(sample(g, n, replace = T)) n1 <- length(which(groups == "black")); m1 <- sample(m, 1); sd1 <- sample(sd, 1) n2 <- length(which(groups == "brown")); m2 <- sample(m, 1); sd2 <- sample(sd, 2) outcome <- c(rnorm(n1, m1, sd1), rnorm(n2, m2, sd2)) results[i,1] <- t.test(outcome ~ groups)\$statistic results[i,2] <- t.test(outcome ~ groups)\$p.value data.list[[i]] <- data.frame(groups, outcome) }; rm(i, n, n1, n2, g, m, sd, groups, m1, m2, sd1, sd2, outcome); ls() # Take a look at the t-test results. summary(results) which(results\$p.vals > .05) which(results\$p.vals < .05) # If you'd like to take a look at the data sets in the data list object, each can # be identified using double brackets: [[i]] summary(data.list[[1]]) summary(data.list[[2]]) summary(data.list[[3]]) print("...###...") summary(data.list[[99]]) summary(data.list[[100]]) ################################################################################ # Next, load the 'fdrtool' package and apply the 'fdrtool' function. library(fdrtool) # *Note: these functions will often return a warning when there are less than 200 # tests (i.e. p-values or test statistics supplied to the functions). t.fdr <- fdrtool(x = results\$t.vals, statistic = "studentt") t.fdr dev.new() p.fdr <- fdrtool(x = results\$p.vals, statistic = "pvalue") p.fdr help(fdrtool) detach("package:fdrtool"); graphics.off() rm(t.fdr, p.fdr) ls() ################################################################################ # One could also use the 'multtest' package for doing various multiple comparison # control of familywise error rate procedures. The 'multtest' package is only # available through BioConductor. To install the 'multtest' package or for more # information / documentation, see: # http://www.bioconductor.org/packages/release/bioc/html/multtest.html # Load the package: library(multtest) # Create a vector which identifies which correction procedures you want to apply. procs <- c("Bonferroni","Holm","Hochberg","SidakSS","SidakSD","BH","BY") # Create a vector of the p-values. rawp <- results\$p.vals # Apply the 'mt.raw2adjp' function from the 'multtest' package. The function # returns adjusted p-values (among other things); in "mt.results\$adjp", the 'BH' # column lists the FDR results. mt.results <- mt.rawp2adjp(rawp,procs) mt.results detach("package:multtest"); detach("package:Biobase") rm(procs, rawp, mt.results); ls() ################################################################################ # References / Resources. # # Benjamini, Y, & Hochberg, Y. (1995). Controlling the false discovery rate: A # Practical and powerful approach to multiple testing. Journal of the Royal # Statistical Society (Series B), 57, 289 -- 300. Available at: JSTOR. # # Benjamini, Y. & Hochberg, Y. (2000). On the adaptive control of the false # discovery rate in multiple testing with independent statistics. Journal # of Educational and Behavioral Statistics, 25, 60 -- 83. Available at: JSTOR. # # Strimmer, K. (2008). A unified approach to false discovery rate estimation. BMC # Bioinformatics, 9:303 -- reprint/typo-corrections of article below. # Available at: http://www.biomedcentral.com/content/pdf/1471-2105-9-303.pdf # # Strimmer, K. (2008). fdrtool: A versatile R package for estimating local and # tail area-based false discovery rates. Bioinformatics, 24, 1461 -- 1462. # Available at: http://bioinformatics.oxfordjournals.org/content/24/12/1461.full.pdf # # End script; March 12, 2012.