# # ######## Partial Least Squares Path Model (PLS PM) data generation. # # Number of subjects n <- 1000 # Loadings (set 0 to 1); here loadings = 0.6 to 0.9. i11.loading <- .9 i12.loading <- .8 i21.loading <- .7 i22.loading <- .6 i23.loading <- .9 i31.loading <- .8 i32.loading <- .7 i33.loading <- .6 i34.loading <- .9 i35.loading <- .8 i36.loading <- .7 i41.loading <- .6 i42.loading <- .9 i43.loading <- .8 i44.loading <- .7 i51.loading <- .6 i52.loading <- .9 i53.loading <- .8 i54.loading <- .7 i55.loading <- .6 # Simulate the common attribute value (latent traits or common factors). f1 <- rnorm(n) f2 <- rnorm(n) f3 <- .8*f1 + .6*f2 f4 <- .8*f2 + .6*f3 f5 <- .8*f3 + .6*f4 # Create Data Array (n = 1000 subjects by 20 items). v1 <- i11.loading*f1 + rnorm(n, 0, sqrt(1 - i11.loading^2)) v2 <- i12.loading*f1 + rnorm(n, 0, sqrt(1 - i12.loading^2)) v3 <- i21.loading*f2 + rnorm(n, 0, sqrt(1 - i21.loading^2)) v4 <- i22.loading*f2 + rnorm(n, 0, sqrt(1 - i22.loading^2)) v5 <- i23.loading*f2 + rnorm(n, 0, sqrt(1 - i23.loading^2)) v6 <- i31.loading*f3 + rnorm(n, 0, sqrt(1 - i31.loading^2)) v7 <- i32.loading*f3 + rnorm(n, 0, sqrt(1 - i32.loading^2)) v8 <- i33.loading*f3 + rnorm(n, 0, sqrt(1 - i33.loading^2)) v9 <- i34.loading*f3 + rnorm(n, 0, sqrt(1 - i34.loading^2)) v10 <- i35.loading*f3 + rnorm(n, 0, sqrt(1 - i35.loading^2)) v11 <- i36.loading*f3 + rnorm(n, 0, sqrt(1 - i36.loading^2)) v12 <- i41.loading*f4 + rnorm(n, 0, sqrt(1 - i41.loading^2)) v13 <- i42.loading*f4 + rnorm(n, 0, sqrt(1 - i42.loading^2)) v14 <- i43.loading*f4 + rnorm(n, 0, sqrt(1 - i43.loading^2)) v15 <- i44.loading*f4 + rnorm(n, 0, sqrt(1 - i44.loading^2)) v16 <- i51.loading*f5 + rnorm(n, 0, sqrt(1 - i51.loading^2)) v17 <- i52.loading*f5 + rnorm(n, 0, sqrt(1 - i52.loading^2)) v18 <- i53.loading*f5 + rnorm(n, 0, sqrt(1 - i53.loading^2)) v19 <- i54.loading*f5 + rnorm(n, 0, sqrt(1 - i54.loading^2)) v20 <- i55.loading*f5 + rnorm(n, 0, sqrt(1 - i55.loading^2)) rm(i11.loading,i12.loading,i21.loading,i22.loading,f1, i23.loading,i31.loading,i32.loading,i33.loading,f2, i34.loading,i35.loading,i36.loading,i41.loading,f3, i42.loading,i43.loading,i44.loading,i51.loading,f4, i52.loading,i53.loading,i54.loading,i55.loading,f5) # Now change the scaling of each item (makes the data more realistic). # First, standardize each variable or convert each score into z-score form: # Z.x=(x-mean(x))/sd(x) v1 <- (v1 - mean(v1))/sd(v1) v2 <- (v2 - mean(v2))/sd(v2) v3 <- (v3 - mean(v3))/sd(v3) v4 <- (v4 - mean(v4))/sd(v4) v5 <- (v5 - mean(v5))/sd(v5) v6 <- (v6 - mean(v6))/sd(v6) v7 <- (v7 - mean(v7))/sd(v7) v8 <- (v8 - mean(v8))/sd(v8) v9 <- (v9 - mean(v9))/sd(v9) v10 <- (v10 - mean(v10))/sd(v10) v11 <- (v11 - mean(v11))/sd(v11) v12 <- (v12 - mean(v12))/sd(v12) v13 <- (v13 - mean(v13))/sd(v13) v14 <- (v14 - mean(v14))/sd(v14) v15 <- (v15 - mean(v15))/sd(v15) v16 <- (v16 - mean(v16))/sd(v16) v17 <- (v17 - mean(v17))/sd(v17) v18 <- (v18 - mean(v18))/sd(v18) v19 <- (v19 - mean(v19))/sd(v19) v20 <- (v20 - mean(v20))/sd(v20) # Second; set the scale to whatever you want; typically the scale of each item # of a factor will be the same (but not always). # scaled.x <- (x * standard deviation) + mean v1 <- (v1 * 15) + 100 v2 <- (v2 * 7.5) + 50 v3 <- (v3 * 3.75) + 25 v4 <- (v4 * 11.25) + 75 v5 <- (v5 * 9) + 60 v6 <- (v6 * 6) + 40 v7 <- (v7 * 5.25) + 35 v8 <- (v8 * 8.25) + 55 v9 <- (v9 * 3) + 20 v10 <- (v10 * 3) + 20 v11 <- (v11 * 6.75) + 45 v12 <- (v12 * 12) + 80 v13 <- (v13 * 7.5) + 50 v14 <- (v14 * 3) + 20 v15 <- (v15 * 3) + 20 v16 <- (v16 * 6.75) + 45 v17 <- (v17 * 12) + 80 v18 <- (v18 * 7.5) + 50 v19 <- (v19 * 3) + 20 v20 <- (v20 * 6.75) + 45 # Misc. demographic and identification variables. id <- seq(1:n) sex <- sample(c(1,2), n, replace = TRUE) age <- round(rnorm(n, 32, 4)) # Convert variable vectors to data.frame data.1 <- data.frame(id, sex, age, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) ls() rm(id, sex, age, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, n) ls() # If desired, write the data out to a working directory (note the data used # in the PLS tutorial is available on the web and accessed in the tutorial # script). write.table(data.1, "C:/Users/jds0282/Desktop/Workstuff/Jon_R/Example Data/PLSdata001.txt", sep=",", col.names=TRUE, row.names=FALSE, quote=TRUE, na="NA")