# # ###### The Germany Tank Problem ###### # # # http://en.wikipedia.org/wiki/German_tank_problem # # Minimum-Variance Unbiased Estimator (MVUE) # n = m(1 + (1/k)) - 1 # where m is the largest serial number observed (sample maximum) and k is the number of # tanks observed (sample size). Note that once a serial number has been observed, # it is no longer in the pool and will not be observed again. # In order to explore the MVUE equation's accuracy, first create a population of sequential numbers. pop.N <- round(rnorm(1,100,15)) pop <- seq(1:pop.N) # Next create a sample (of size 25 here) of that population. x <- sort(sample(pop, size = 25, replace = FALSE, prob = NULL)) # Next, assign the values to 'm' and 'k' based on the sample. m <- x[25] k <- 25 # Next, apply the MVUE equation to 'estimate' the population size, N. est.n = (m*(1 + (1/k))) - 1 # Compare the estimated population size (n) to the actual population # size (pop.N). est.n pop.N # Difference or error of prediction. est.n - pop.N #################################################################### # Putting the above steps together into a simulation. # Iterated steps to check the accuracy of estimation; # while saving out the values of 'pop.N' and 'est.n' into # the vectors 'sim.pop.N' and 'sim.est.n' respectively. sim.est.n <- as.vector(0) sim.pop.N <- as.vector(0) plot(est.n,pop.N, xlim=c(50,150),ylim=c(50,150)) for(i in 1:1000){ pop.N <- round(rnorm(1,100,15)) pop <- seq(1:pop.N) x <- sort(sample(pop, size = 25, replace = FALSE, prob = NULL)) m <- x[25] k <- 25 est.n = (m*(1 + (1/k))) - 1 par(new = T) plot(est.n,pop.N, xlim=c(50,150),ylim=c(50,150)) sim.pop.N[i] <- pop.N sim.est.n[i] <- est.n } # Check the correlation between the 'real' population size (sim.pop.n) and the # 'estimated' population size (sim.N); as a proxy measure of accuracy. cor(sim.est.n, sim.pop.N) cor(sim.est.n, sim.pop.N)^2 err <- sim.est.n - sim.pop.N summary(err) hist(err) # Create a more detailed scatterplot of the simulates. library(car) scatterplot(sim.est.n, sim.pop.N, ellipse = TRUE) hist(sim.est.n) plot(density(sim.est.n), main = "", xlab = "sim.est.n", ylab = "Density") hist(sim.pop.N) plot(density(sim.pop.N), main = "", xlab = "sim.pop.N", ylab = "Density") df.1 <- data.frame(sim.est.n, sim.pop.N, err) summary(df.1) head(df.1, 25) # END: Jan. 2011.