# # # ### Creating some demographic variables with 2 REGIONS, and 10 CITIES. # # sample.size.wanted <- 1000 # Insert sample size here, then run the entire script. ################################################################################ # <<< REGION 1 >>> # # Population sizes for each City. n1 <- 165321 n2 <- 281683 n3 <- 261789 n4 <- 200144 n5 <- 185635 N1 <- n1 + n2 + n3 + n4 + n5 sta1 <- n1 sta2 <- sta1 + n2 sta3 <- sta2 + n3 sta4 <- sta3 + n4 sta5 <- sta4 + n5 # Create a Region case/row idenifier variable. region.id <- seq(1:N1) # Create a City identifier varaible and a City name factor. city <- c(rep(1, n1), rep(2, n2), rep(3, n3), rep(4, n4), rep(5, n5)) city.names <- factor(c(rep("El Pasio", n1), rep("Burkley", n2), rep("Ditroit", n3), rep("Bolder", n4), rep("South Beech", n5))) # Create a Gender (or sex) variable. library(car) gender <- factor(rbinom(N1, 1, .53)) gender <- recode(gender, "1 = 'Female'; 0 = 'Male'") # Create an Age variable. age.z <- as.list(0) for (i in 1:5){ n <- c(n1, n2, n3, n4, n5) m <- c(32, 48, 37, 41, 39) sd <- c(13, 29, 18, 22, 20) a <- round(rnorm(n[i], m[i], sd[i])) out <- which(a <= 17) a[out] <- (m[i] + sd[i]) - a[out] upper <- c(105,106,107,108,109,110,110) up <- sample(upper, 1) out <- which(a >= up) a[out] <- a[out] - (m[i] + sd[i]) age.z[[i]] <- a rm(a,out,upper,up) } age <- c(age.z[[1]], age.z[[2]], age.z[[3]], age.z[[4]], age.z[[5]]) rm(age.z, i, n, m, sd) # Create an Education variable. z.age <- (age - mean(age))/sd(age) ed <- .8*z.age - rnorm(N1, 0, .5) z.ed <- (ed - mean(ed))/sd(ed) ed <- (z.ed*2.25) + 11 ed[1:sta1] <- ed[1:sta1] - 3.5 ed[sta1:sta2] <- ed[sta1:sta2] + 1.6 ed[sta2:sta3] <- ed[sta2:sta3] - 2.6 ed[sta3:sta4] <- ed[sta3:sta4] - 1.5 ed[sta4:sta5] <- ed[sta4:sta5] + 1.5 low <- which(age <= 5) ed[low] <- 0 education <- round(ed) rm(ed, low, z.ed) # Create an Income variable. z.edu <- (education - mean(education))/sd(education) z.ic <- rbeta(N1, 3, 18) inc <- .6*z.age + .7*z.edu + .8*city + .95*z.ic income <- (inc * 15000) + 50000 low <- which(income <= 15000) income[low] <- income[low] + 25000 rm(inc, low, z.age, z.edu, z.ic) # Put it all together. region.1 <- data.frame(region.id, city, city.names, gender, age, education, income) rm(region.id, city, city.names, gender, age, education, income, n1, n2, n3, n4, n5, sta1, sta2, sta3, sta4, sta5) detach("package:car") detach("package:MASS") detach("package:nnet") ################################################################################ # <<< REGION 2 >>> # # Population city sizes. n1 <- 331213 n2 <- 119115 n3 <- 262982 n4 <- 208317 n5 <- 95335 N2 <- n1 + n2 + n3 + n4 + n5 sta1 <- n1 sta2 <- sta1 + n2 sta3 <- sta2 + n3 sta4 <- sta3 + n4 sta5 <- sta4 + n5 # Create a Region case/row idenifier variable. region.id <- seq(1:N2) # Create a City identifier varaible and a City factor. city <- c(rep(6, n1), rep(7, n2), rep(8, n3), rep(9, n4), rep(10, n5)) city.names <- factor(c(rep("Whatts", n1), rep("Garvard", n2), rep("Pittsburger", n3), rep("Spingfield", n4), rep("VallStreet", n5))) # Create Gender variables. gender1 <- rbinom(n1, 1, .48) gender2 <- rbinom(n2, 1, .45) gender3 <- rbinom(n3, 1, .49) gender4 <- rbinom(n4, 1, .54) gender5 <- rbinom(n5, 1, .37) # Create an Age variable. age1 <- rnorm(n1) age1 <- round(18 + (age1*5)) low <- which(age1 < 18) age1[low] <- age1[low] + 27 age2 <- rnorm(n2) age2 <- round(30 + (age2*3)) low <- which(age2 < 18) age2[low] <- age2[low] + 30 age3 <- rnorm(n3) age3 <- round(35 + (age3*8)) low <- which(age3 < 18) age3[low] <- age3[low] + 35 age4 <- rnorm(n4) age4 <- round(40 + (age4*9)) low <- which(age4 < 18) age4[low] <- age4[low] + 40 age5 <- rnorm(n5) age5 <- round(32 + (age5*4)) low <- which(age5 < 18) age5[low] <- age5[low] + 28 rm(low) # Create an Education variable. z.age1 <- (age1 - mean(age1))/sd(age1) z.age2 <- (age2 - mean(age2))/sd(age2) z.age3 <- (age3 - mean(age3))/sd(age3) z.age4 <- (age4 - mean(age4))/sd(age4) z.age5 <- (age5 - mean(age5))/sd(age5) zed1 <- (.7*z.age1) + rnorm(n1, 0, 1.0) zed2 <- (.9*z.age2) + rnorm(n2, 0, 1.6) zed3 <- (.7*z.age3) + rnorm(n3, 0, 1.5) zed4 <- (.8*z.age4) + rnorm(n4, 0, 2) zed5 <- (.8*z.age5) + rbinom(n5, 6, .45) ed1 <- round(zed1 + 9) ed2 <- round(zed2 + 18) ed3 <- round(zed3 + 10) ed4 <- round(zed4 + 12) ed5 <- round(zed5 + 14) rm(zed1, zed2, zed3, zed4, zed5) # Create an Income variable. z.edu1 <- (ed1 - mean(ed1))/sd(ed1) z.ic <- rbeta(n1, 3, 18) inc <- .6*z.age1 + .7*z.edu1 + .95*z.ic income1 <- (inc * 2000) + 20000 z.edu2 <- (ed2 - mean(ed2))/sd(ed2) z.ic <- rbeta(n2, 3, 18) inc <- .6*z.age2 + .9*z.edu2 + .95*z.ic income2 <- (inc * 5000) + 50000 z.edu3 <- (ed3 - mean(ed3))/sd(ed3) z.ic <- rbeta(n3, 3, 18) inc <- .8*z.age3 + .6*z.edu3 + .95*z.ic income3 <- (inc * 4000) + 40000 z.edu4 <- (ed4 - mean(ed4))/sd(ed4) z.ic <- rbeta(n4, 8, 40) inc <- .7*z.age4 + .8*z.edu4 + .95*z.ic income4 <- (inc * 6500) + 60000 z.edu5 <- (ed5 - mean(ed5))/sd(ed5) z.ic <- rbeta(n5, 3, 18) inc <- .5*z.age5 + .7*z.edu5 + .95*z.ic income5 <- (inc * 5000) + 95000 rm(inc, z.age1, z.age2, z.age3, z.age4, z.age5, z.edu1, z.edu2, z.edu3, z.edu4, z.edu5, z.ic) # Putting it all together. library(car) gender <- factor(c(gender1, gender2, gender3, gender4, gender5)) gender <- recode(gender, "1 = 'Female'; 0 = 'Male'") rm(gender1, gender2, gender3, gender4, gender5) age <- c(age1, age2, age3, age4, age5) rm(age1, age2, age3, age4, age5) education <- c(ed1, ed2, ed3, ed4, ed5) rm(ed1, ed2, ed3, ed4, ed5) income <- c(income1, income2, income3, income4, income5) rm(income1, income2, income3, income4, income5) region.2 <- data.frame(region.id, city, city.names, gender, age, education, income) rm(region.id, city, city.names, gender, age, education, income, n1, n2, n3, n4, n5, sta1, sta2, sta3, sta4, sta5) detach("package:car") detach("package:MASS") detach("package:nnet") ##### Putting the two Regions together to form the Population. N <- N1 + N2 pop.id <- seq(1:N) region <- factor(c(rep("I", N1), rep("II", N2))) population <- data.frame(pop.id, region, rbind(region.1, region.2)) rm(N, N1, N2, pop.id, region, region.1, region.2); ls() summary(population) ################################################################################ ##### Draw a sample of size n; as specified up at the top of the script. n <- sample.size.wanted s.id <- sample(population[,1], n, replace = F) sample <- population[s.id,-c(3:4)]; rm(n, s.id, sample.size.wanted) names(sample)[1] <- "id" summary(sample) nrow(sample) head(sample) sample <- sample[order(sample$id),] head(sample) ls() # End of Script.