# # # ### Creating some demographic variables with 5 cities. # # sample.size.wanted <- 1000 # Insert sample size here, then run the entire script. ################################################################################ # Population sizes for each City. n1 <- 165321 n2 <- 281683 n3 <- 261789 n4 <- 200144 n5 <- 185635 N1 <- n1 + n2 + n3 + n4 + n5 id <- seq(1:N1) sta1 <- n1 sta2 <- sta1 + n2 sta3 <- sta2 + n3 sta4 <- sta3 + n4 sta5 <- sta4 + n5 # Create a City name factor. city <- c(rep(1,n1), rep(2, n2), rep(3, n3), rep(4, n4), rep(5, n5)) city.names <- factor(c(rep("El Pasio", n1), rep("Burkley", n2), rep("Ditroit", n3), rep("Bolder", n4), rep("South Beech", n5))) # Create a Gender (or sex) variable. library(car) gender <- factor(rbinom(N1, 1, .53)) gender <- recode(gender, "1 = 'Female'; 0 = 'Male'") # Create an Age variable. age.z <- as.list(0) for (i in 1:5){ n <- c(n1, n2, n3, n4, n5) m <- c(32, 48, 37, 41, 39) sd <- c(13, 29, 18, 22, 20) a <- round(rnorm(n[i], m[i], sd[i])) out <- which(a <= 17) a[out] <- (m[i] + sd[i]) - a[out] upper <- c(105,106,107,108,109,110,110) up <- sample(upper, 1) out <- which(a >= up) a[out] <- a[out] - (m[i] + sd[i]) age.z[[i]] <- a rm(a,out,upper,up) } age <- c(age.z[[1]], age.z[[2]], age.z[[3]], age.z[[4]], age.z[[5]]) rm(age.z, i, n, m, sd) # Create an Education variable. z.age <- (age - mean(age))/sd(age) ed <- .8*z.age - rnorm(N1, 0, .5) z.ed <- (ed - mean(ed))/sd(ed) ed <- (z.ed*2.25) + 11 ed[1:sta1] <- ed[1:sta1] - 3.5 ed[sta1:sta2] <- ed[sta1:sta2] + 1.6 ed[sta2:sta3] <- ed[sta2:sta3] - 2.6 ed[sta3:sta4] <- ed[sta3:sta4] - 1.5 ed[sta4:sta5] <- ed[sta4:sta5] + 1.5 low <- which(age <= 5) ed[low] <- ed[low] <- 0 education <- round(ed) rm(ed, low, z.ed) # Create an Income variable. z.edu <- (education - mean(education))/sd(education) z.ic <- rbeta(N1, 3, 18) inc <- .6*z.age + .7*z.edu + .8*city + .95*z.ic income <- (inc * 15000) + 50000 low <- which(income <= 15000) income[low] <- income[low] + 25000 rm(inc, low, z.age, z.edu, z.ic) # Put it all together. population <- data.frame(id, city.names, gender, age, education, income) rm(id, city, city.names, gender, age, education, income, N1, n1, n2, n3, n4, n5, sta1, sta2, sta3, sta4, sta5) detach("package:car"); detach("package:MASS"); detach("package:nnet") summary(population) # Now, we can draw a sample of size n; as specified at the top of the script. n <- sample.size.wanted sample.id <- sample(population$id, n, replace = F) demographics <- population[sample.id,] rm(n, sample.id, population, sample.size.wanted) summary(demographics) nrow(demographics) head(demographics) demographics <- demographics[order(demographics$id),] head(demographics) # End of Script.