#
#
########## VERY SIMPLE Non-Linear Regression examples ##########
#
# Attempting to mirror the Model Misspecification Benchmarks article I did previously
# using SPSS. However, the exponential model does not fit properly due to the use of
# transformation(s) during the fit process in SPSS--therefore, the exponential model
# in R does not match SPSS.
# Take a look at the article:
# http://web3.unt.edu/benchmarks/issues/2010/04/rss-matters
library(foreign)
library(Rcmdr)
# Read the data into R.
x <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0)
y <- c(1.900, 1.620, 1.370, 1.060, 0.720, 0.480, 0.350, 0.280, 0.260, 0.240, 0.230, 0.200, 0.180, 0.170,
0.150, 0.143, 0.130, 0.110, 0.100, 0.090, 0.080, 0.077, 0.071, 0.068, 0.063, 0.050, 0.041, 0.036,
0.022, 0.019)
plot(x,y, xlim=c(-.5,3), ylim=c(-.5,3))
# Apply a common linear (OLS) regression; all 'looks' well, but....
reg1 <- lm(y ~ x)
summary(reg1)
# Robust linear regression.
library(robust)
rob.reg <- lmRob(y ~ x)
summary(rob.reg)
####### These (below) are NOT Non-Linear Regressions....one of the few examples where Rcmdr can lead
####### one astray.
#
# Using Rcmdr --> Statistics --> Fit Model --> Linear Model.
# HOWEVER, they are NOT really doing what we want (e.g. Quadratic regression, Exponential...)
# even though we specify what appears to be two non-linear model equations.
# A quadratic regression.
LinearModel.1 <- lm(y ~ x^2 + x)
summary(LinearModel.1)
# A cubic regression.
LinearModel.2 <- lm(y ~ x^3 + x^2 + x)
summary(LinearModel.2)
########## REAL Non-Linear Regression ##########
# In order to 'really' do those regressions, you need to load the correct library (nls2) and use
# the correct function(nls) or function(nls2) which is an updated version (Non-linear Least Squares [nls]).
#
# When using this function; you will need to known the formula for the model you are attempting
# to apply.
library(nls2)
# Note my naming convention for below: nlm1 = Non-Linear Model 1.
#
# Also note, the 'nls' function requires typing the formula, then giving some start values
# for the coefficients. You'll notice here, they tend to be arbitrary, however; they may be
# driven by theory or scale constraints. The start values must be input as a named list.
# If a start value 'list' is left out, a 'best guess' is made by the library which may not
# lead to convergence.
# Apply a quadratic regression (matches the SPSS coefficients from the article).
nlm1 <- nls(y ~ b1*x^2 + b2*x + a, start = list(b1 = 0.765, b2 = 0.543, a = 0.123))
summary(nlm1)
# Apply a cubic regression (matches the SPSS coefficients from the article).
nlm2 <- nls(y ~ b1*x^3 + b2*x^2 + b3*x + a, start = list(b1 = 0.765, b2 = 0.543, b3 = 0.456, a = 0.123))
summary(nlm2)
###
# The 'predict' function returns predicted values.
predict(reg1, se.fit = TRUE)
predict(nlm1)
###### Neither of these below match the SPSS version due to SPSS's transformation(s) to avoid
# the non-solvable derivatives which necessites the use of different algorithms in R.
# Functional exponential model with 'port' algorithm.
nlm3.2 <- nls(y ~ b1^x + a, start = list(b1 = 0.765, a = 0.123), algorithm = 'port')
summary(nlm3.2)
# Functional exponential model with 'brute-force' algorithm; this simply returns the start
# values as the coefficients.
nlm3.3 <- nls2(y ~ b1^x + a, start = list(b1 = 0.765, a = 0.123), algorithm = 'brute-force')
summary(nlm3.3)
# The original version (default Gauss-Newton algorithm) does not work because the
# derivatives can not be solved.
nlm3 <- nls(y ~ b1^x + a, start = list(b1 = 0.765, a = 0.123))
summary(nlm3)
# However, if we place a negative before the b-weight; it still will not match the SPSS version, but
# it will converge with a solution.
nlm3.4 <- nls(y ~ -b1^x + a, start = list(b1 = 0.765, a = 0.123))
summary(nlm3.4)
########## Loess Models & Plots ##########
plot(x, y, col = "red", xlim=c(-.5,3), ylim=c(-.5,3))
lines(lowess(x, y, f=.2), lwd=1, col="black")
# Fitting lowess; slightly different plotted points result from using different functions
# of lowess in the stats package. Notice after running 'low1' the green points in the
# graph are not quite on top of the purple points.
low1 <- lowess(y, x, f = 0.5)
plot(low1, col = "purple", xlim=c(-.5,3), ylim=c(-.5,3))
lines(lowess(y, x, f=.6), lwd=1, col="black")
low1 # new values (predicted values) which were ploted in purple.
summary(low1)
# Lowess again; blue points directly on top of the original red ones.
low2 <- loess(y ~ x, span = 0.6, degree = 1)
plot(x, y, col = "red", xlim=c(-.5,3), ylim=c(-.5,3))
par(new = TRUE)
plot(low2, col = "blue", xlim=c(-.5,3), ylim=c(-.5,3),xlab="x", ylab="y")
lines(lowess(y, x, f=.8), lwd=1, col="dark blue")
low2
summary(low2)
# Lowess in locfit; perhaps the best fit line yet.
library(locfit)
low3 <- locfit(y ~ x)
par(new=TRUE)
plot(low3, get.data=TRUE, col = "red", xlim=c(-.5,3), ylim=c(-.5,3), xlab="x", ylab="y", cex = 2)
low3
summary(low3)
### Nonparametric regression in library(sm).
library(sm)
low4 <- sm.regression(x, y, col = "red", xlim=c(-.5,3), ylim=c(-.5,3), xlab="x", ylab="y", cex = 2, h = 0.3, model = "linear")
summary(low4)
low4