# # # Examples of estimating the parameters (a & b) of a linear Ordinary Least Squares (OLS) regression line. # # #################################################### # Simple example. # The model is y = b*x + a and the correlation of 'x' and 'y' is positive one (r = 1.0) x <- c(-2, -1, 1, 2) y <- c(-2, 0, 4, 6) b <- -3 a <- 2 for (i in 1:5){ plot(x, y, cex = 2, col = "blue", xlim = c(-3, 3), ylim = c(-5, 10)) b <- b + 1 abline(a, b, col = "red", lwd = 1, lty = "dashed") if (i > 4){ abline(a, b, col = "green", lwd = 2, lty = "solid")} Sys.sleep(1.0) } #################################################### # More realistic example. # The model is y = b*x + a + e where 'e' represents error (i.e. correlation is not perfect). n <- 100 x <- rnorm(n) b <- 1.5 a <- 3 e <- rnorm(n) y <- b*x + a + e mod.1 <- lm(y ~ x) summary(mod.1) plot(x, y, col = "blue", xlim = c(-3, 3), ylim = c(-3, 9)) abline(reg = mod.1, col = "green", lwd = 2, lty = "dashed") b2 <- -4 for (i in 1:10){ b2 <- b2 + .5 abline(a, b2, col = "red", lwd = 1, lty = "dashed") Sys.sleep(1.0) if (i > 9){ abline(reg = mod.1, col = "green", lwd = 2, lty = "solid")} } # Notice the "center" (i.e. y-intercept) of the red lines is # not exactly on the y-intercept of the green line (actual # y-intercept). This is caused by the error term introduced # in the data generation stage (i.e. creating a realistic # looking set of 'y' data). With out this error term (e), # all the blue points would be directly on the green line # (see below). # Here, the plot contains the sample data (with error), the # green line represent the best fit linear (OLS) regression # line, the red line represents the original values of the # y-intercept (a = 3) and the coefficient of x (b = 1.5) used to # create the values of 'y'. plot(x, y, col = "blue", xlim = c(-3, 3), ylim = c(-3, 9)) abline(reg = mod.1, col = "green", lwd = 2) abline(a, b, col = "red", lwd = 1) # End: Feb 2011