## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----------------------------------------------------------------------------- survey_sim_est <- function(Y, X, n = 40, SimNo = 500, seed = 123) { if (length(Y) != length(X)) stop("Y and X must be of the same length.") set.seed(seed) N <- length(Y) Y.total <- sum(Y) X.total <- sum(X) True.Total <- rep(Y.total, SimNo) data <- data.frame(ID = 1:N, Y = Y, X = X) est1 <- est2 <- est3 <- var_est1 <- var_est2 <- var_est3 <- numeric(SimNo) for (h in 1:SimNo) { set.seed(h) sa <- sample(1:N, n, replace = FALSE) samp <- data[sa, ] di <- rep(N / n, n) y_samp <- samp$Y x_samp <- samp$X s2_y <- var(y_samp) s2_x <- var(x_samp) rho_hat <- cor(y_samp, x_samp) R_hat <- mean(y_samp) / mean(x_samp) # HT Estimator est1[h] <- sum(di * y_samp) var_est1[h] <- N^2 * ((1 / n) - (1 / N)) * s2_y # Ratio Estimator ratio1 <- di * y_samp ratio2 <- di * x_samp est2[h] <- (sum(ratio1) / sum(ratio2)) * X.total var_est2[h] <- N^2 * ((1 / n) - (1 / N)) * (s2_y + (R_hat^2 * s2_x) - (2 * R_hat * rho_hat * sqrt(s2_y * s2_x))) # Regression Estimator b_hat <- rho_hat * sqrt(s2_y) / sqrt(s2_x) est3[h] <- est1[h] + b_hat * (X.total - sum(ratio2)) var_est3[h] <- N^2 * ((1 / n) - (1 / N)) * s2_y * (1 - rho_hat^2) } # Population-level parameters pop_s2_y <- var(Y) pop_s2_x <- var(X) R_pop <- mean(Y) / mean(X) rho_pop <- cor(Y, X) var_HT <- N^2 * ((1 / n) - (1 / N)) * pop_s2_y var_ratio <- N^2 * ((1 / n) - (1 / N)) * (pop_s2_y + R_pop^2 * pop_s2_x - 2 * R_pop * rho_pop * sqrt(pop_s2_y * pop_s2_x)) var_reg <- N^2 * ((1 / n) - (1 / N)) * pop_s2_y * (1 - rho_pop^2) # Evaluation metrics RB <- 100 * colMeans(cbind(est1, est2, est3) - True.Total) / mean(True.Total) RRMSE <- 100 * sqrt(colMeans((cbind(est1, est2, est3) - True.Total)^2)) / mean(True.Total) CV <- 100 * apply(cbind(est1, est2, est3), 2, sd) / colMeans(cbind(est1, est2, est3)) skew <- apply(cbind(est1, est2, est3), 2, moments::skewness) kurt <- apply(cbind(est1, est2, est3), 2, moments::kurtosis) emp_var <- apply(cbind(est1, est2, est3), 2, var) est_var <- c(mean(var_est1), mean(var_est2), mean(var_est3)) coverage <- colMeans(abs(cbind(est1, est2, est3) - True.Total) <= 1.96 * sqrt(cbind(var_est1, var_est2, var_est3))) result <- data.frame( Est_Total = c(mean(est1), mean(est2), mean(est3)), RB = RB, RRMSE = RRMSE, Skewness = skew, Kurtosis = kurt, Coverage = coverage, PopVar = c(var_HT, var_ratio, var_reg), EmpVar = emp_var, EstVar = est_var, CV = CV ) rownames(result) <- c("HT", "Ratio", "Regression") return(round(result, 3)) } ## ----example, message=FALSE--------------------------------------------------- set.seed(123) N <- 1000 X <- rnorm(N, mean = 50, sd = 10) Y <- 3 + 1.5 * X + rnorm(N, mean = 0, sd = 10) result <- survey_sim_est(Y = Y, X = X, n = 50, SimNo = 100) print(result)