params <- list(EVAL = TRUE) ## ----SETTINGS-knitr, include=FALSE-------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, eval = if (isTRUE(exists("params"))) params$EVAL else FALSE, dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ----stancode----------------------------------------------------------------- # Note: some syntax used in this Stan program requires RStan >= 2.26 (or CmdStanR) # To use an older version of RStan change the line declaring `y` to: int y[N]; stancode <- " data { int K; int N; matrix[N,K] x; array[N] int y; vector[N] offset; real beta_prior_scale; real alpha_prior_scale; } parameters { vector[K] beta; real intercept; } model { y ~ poisson(exp(x * beta + intercept + offset)); beta ~ normal(0,beta_prior_scale); intercept ~ normal(0,alpha_prior_scale); } generated quantities { vector[N] log_lik; for (n in 1:N) log_lik[n] = poisson_lpmf(y[n] | exp(x[n] * beta + intercept + offset[n])); } " ## ----setup, message=FALSE----------------------------------------------------- library("rstan") library("loo") seed <- 9547 set.seed(seed) ## ----modelfit-holdout, message=FALSE------------------------------------------ # Prepare data data(roaches, package = "rstanarm") roaches$roach1 <- sqrt(roaches$roach1) roaches$offset <- log(roaches[,"exposure2"]) # 20% of the data goes to the test set: roaches$test <- 0 roaches$test[sample(.2 * seq_len(nrow(roaches)))] <- 1 # data to "train" the model data_train <- list(y = roaches$y[roaches$test == 0], x = as.matrix(roaches[roaches$test == 0, c("roach1", "treatment", "senior")]), N = nrow(roaches[roaches$test == 0,]), K = 3, offset = roaches$offset[roaches$test == 0], beta_prior_scale = 2.5, alpha_prior_scale = 5.0 ) # data to "test" the model data_test <- list(y = roaches$y[roaches$test == 1], x = as.matrix(roaches[roaches$test == 1, c("roach1", "treatment", "senior")]), N = nrow(roaches[roaches$test == 1,]), K = 3, offset = roaches$offset[roaches$test == 1], beta_prior_scale = 2.5, alpha_prior_scale = 5.0 ) ## ----fit-train---------------------------------------------------------------- # Compile stanmodel <- stan_model(model_code = stancode) # Fit model fit <- sampling(stanmodel, data = data_train, seed = seed, refresh = 0) ## ----gen-test----------------------------------------------------------------- gen_test <- gqs(stanmodel, draws = as.matrix(fit), data= data_test) log_pd <- extract_log_lik(gen_test) ## ----elpd-holdout------------------------------------------------------------- (elpd_holdout <- elpd(log_pd)) ## ----prepare-folds, message=FALSE--------------------------------------------- # Prepare data roaches$fold <- kfold_split_random(K = 10, N = nrow(roaches)) ## ----------------------------------------------------------------------------- # Prepare a matrix with the number of post-warmup iterations by number of observations: log_pd_kfold <- matrix(nrow = 4000, ncol = nrow(roaches)) # Loop over the folds for(k in 1:10){ data_train <- list(y = roaches$y[roaches$fold != k], x = as.matrix(roaches[roaches$fold != k, c("roach1", "treatment", "senior")]), N = nrow(roaches[roaches$fold != k,]), K = 3, offset = roaches$offset[roaches$fold != k], beta_prior_scale = 2.5, alpha_prior_scale = 5.0 ) data_test <- list(y = roaches$y[roaches$fold == k], x = as.matrix(roaches[roaches$fold == k, c("roach1", "treatment", "senior")]), N = nrow(roaches[roaches$fold == k,]), K = 3, offset = roaches$offset[roaches$fold == k], beta_prior_scale = 2.5, alpha_prior_scale = 5.0 ) fit <- sampling(stanmodel, data = data_train, seed = seed, refresh = 0) gen_test <- gqs(stanmodel, draws = as.matrix(fit), data= data_test) log_pd_kfold[, roaches$fold == k] <- extract_log_lik(gen_test) } ## ----elpd-kfold--------------------------------------------------------------- (elpd_kfold <- elpd(log_pd_kfold))