#
#
# Examples of estimating the parameters (a & b) of a linear Ordinary Least Squares (OLS) regression line.
#
#
####################################################
# Simple example.
# The model is y = b*x + a and the correlation of 'x' and 'y' is positive one (r = 1.0)
x <- c(-2, -1, 1, 2)
y <- c(-2, 0, 4, 6)
b <- -3
a <- 2
for (i in 1:5){
plot(x, y, cex = 2, col = "blue", xlim = c(-3, 3), ylim = c(-5, 10))
b <- b + 1
abline(a, b, col = "red", lwd = 1, lty = "dashed")
if (i > 4){
abline(a, b, col = "green", lwd = 2, lty = "solid")}
Sys.sleep(1.0)
}
####################################################
# More realistic example.
# The model is y = b*x + a + e where 'e' represents error (i.e. correlation is not perfect).
n <- 100
x <- rnorm(n)
b <- 1.5
a <- 3
e <- rnorm(n)
y <- b*x + a + e
mod.1 <- lm(y ~ x)
summary(mod.1)
plot(x, y, col = "blue", xlim = c(-3, 3), ylim = c(-3, 9))
abline(reg = mod.1, col = "green", lwd = 2, lty = "dashed")
b2 <- -4
for (i in 1:10){
b2 <- b2 + .5
abline(a, b2, col = "red", lwd = 1, lty = "dashed")
Sys.sleep(1.0)
if (i > 9){
abline(reg = mod.1, col = "green", lwd = 2, lty = "solid")}
}
# Notice the "center" (i.e. y-intercept) of the red lines is
# not exactly on the y-intercept of the green line (actual
# y-intercept). This is caused by the error term introduced
# in the data generation stage (i.e. creating a realistic
# looking set of 'y' data). With out this error term (e),
# all the blue points would be directly on the green line
# (see below).
# Here, the plot contains the sample data (with error), the
# green line represent the best fit linear (OLS) regression
# line, the red line represents the original values of the
# y-intercept (a = 3) and the coefficient of x (b = 1.5) used to
# create the values of 'y'.
plot(x, y, col = "blue", xlim = c(-3, 3), ylim = c(-3, 9))
abline(reg = mod.1, col = "green", lwd = 2)
abline(a, b, col = "red", lwd = 1)
# End: Feb 2011