# # ########## VERY SIMPLE Non-Linear Regression examples ########## # # Attempting to mirror the Model Misspecification Benchmarks article I did previously # using SPSS. However, the exponential model does not fit properly due to the use of # transformation(s) during the fit process in SPSS--therefore, the exponential model # in R does not match SPSS. # Take a look at the article: # http://web3.unt.edu/benchmarks/issues/2010/04/rss-matters library(foreign) library(Rcmdr) # Read the data into R. x <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0) y <- c(1.900, 1.620, 1.370, 1.060, 0.720, 0.480, 0.350, 0.280, 0.260, 0.240, 0.230, 0.200, 0.180, 0.170, 0.150, 0.143, 0.130, 0.110, 0.100, 0.090, 0.080, 0.077, 0.071, 0.068, 0.063, 0.050, 0.041, 0.036, 0.022, 0.019) plot(x,y, xlim=c(-.5,3), ylim=c(-.5,3)) # Apply a common linear (OLS) regression; all 'looks' well, but.... reg1 <- lm(y ~ x) summary(reg1) # Robust linear regression. library(robust) rob.reg <- lmRob(y ~ x) summary(rob.reg) ####### These (below) are NOT Non-Linear Regressions....one of the few examples where Rcmdr can lead ####### one astray. # # Using Rcmdr --> Statistics --> Fit Model --> Linear Model. # HOWEVER, they are NOT really doing what we want (e.g. Quadratic regression, Exponential...) # even though we specify what appears to be two non-linear model equations. # A quadratic regression. LinearModel.1 <- lm(y ~ x^2 + x) summary(LinearModel.1) # A cubic regression. LinearModel.2 <- lm(y ~ x^3 + x^2 + x) summary(LinearModel.2) ########## REAL Non-Linear Regression ########## # In order to 'really' do those regressions, you need to load the correct library (nls2) and use # the correct function(nls) or function(nls2) which is an updated version (Non-linear Least Squares [nls]). # # When using this function; you will need to known the formula for the model you are attempting # to apply. library(nls2) # Note my naming convention for below: nlm1 = Non-Linear Model 1. # # Also note, the 'nls' function requires typing the formula, then giving some start values # for the coefficients. You'll notice here, they tend to be arbitrary, however; they may be # driven by theory or scale constraints. The start values must be input as a named list. # If a start value 'list' is left out, a 'best guess' is made by the library which may not # lead to convergence. # Apply a quadratic regression (matches the SPSS coefficients from the article). nlm1 <- nls(y ~ b1*x^2 + b2*x + a, start = list(b1 = 0.765, b2 = 0.543, a = 0.123)) summary(nlm1) # Apply a cubic regression (matches the SPSS coefficients from the article). nlm2 <- nls(y ~ b1*x^3 + b2*x^2 + b3*x + a, start = list(b1 = 0.765, b2 = 0.543, b3 = 0.456, a = 0.123)) summary(nlm2) ### # The 'predict' function returns predicted values. predict(reg1, se.fit = TRUE) predict(nlm1) ###### Neither of these below match the SPSS version due to SPSS's transformation(s) to avoid # the non-solvable derivatives which necessites the use of different algorithms in R. # Functional exponential model with 'port' algorithm. nlm3.2 <- nls(y ~ b1^x + a, start = list(b1 = 0.765, a = 0.123), algorithm = 'port') summary(nlm3.2) # Functional exponential model with 'brute-force' algorithm; this simply returns the start # values as the coefficients. nlm3.3 <- nls2(y ~ b1^x + a, start = list(b1 = 0.765, a = 0.123), algorithm = 'brute-force') summary(nlm3.3) # The original version (default Gauss-Newton algorithm) does not work because the # derivatives can not be solved. nlm3 <- nls(y ~ b1^x + a, start = list(b1 = 0.765, a = 0.123)) summary(nlm3) # However, if we place a negative before the b-weight; it still will not match the SPSS version, but # it will converge with a solution. nlm3.4 <- nls(y ~ -b1^x + a, start = list(b1 = 0.765, a = 0.123)) summary(nlm3.4) ########## Loess Models & Plots ########## plot(x, y, col = "red", xlim=c(-.5,3), ylim=c(-.5,3)) lines(lowess(x, y, f=.2), lwd=1, col="black") # Fitting lowess; slightly different plotted points result from using different functions # of lowess in the stats package. Notice after running 'low1' the green points in the # graph are not quite on top of the purple points. low1 <- lowess(y, x, f = 0.5) plot(low1, col = "purple", xlim=c(-.5,3), ylim=c(-.5,3)) lines(lowess(y, x, f=.6), lwd=1, col="black") low1 # new values (predicted values) which were ploted in purple. summary(low1) # Lowess again; blue points directly on top of the original red ones. low2 <- loess(y ~ x, span = 0.6, degree = 1) plot(x, y, col = "red", xlim=c(-.5,3), ylim=c(-.5,3)) par(new = TRUE) plot(low2, col = "blue", xlim=c(-.5,3), ylim=c(-.5,3),xlab="x", ylab="y") lines(lowess(y, x, f=.8), lwd=1, col="dark blue") low2 summary(low2) # Lowess in locfit; perhaps the best fit line yet. library(locfit) low3 <- locfit(y ~ x) par(new=TRUE) plot(low3, get.data=TRUE, col = "red", xlim=c(-.5,3), ylim=c(-.5,3), xlab="x", ylab="y", cex = 2) low3 summary(low3) ### Nonparametric regression in library(sm). library(sm) low4 <- sm.regression(x, y, col = "red", xlim=c(-.5,3), ylim=c(-.5,3), xlab="x", ylab="y", cex = 2, h = 0.3, model = "linear") summary(low4) low4