## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(aggTrees) library(grf) ## ----data-generation, eval = TRUE--------------------------------------------- ## Generate data. set.seed(1986) n <- 500 # Small sample size due to compliance with CRAN notes. k <- 3 X <- matrix(rnorm(n * k), ncol = k) colnames(X) <- paste0("x", seq_len(k)) D <- rbinom(n, size = 1, prob = 0.5) mu0 <- 0.5 * X[, 1] mu1 <- 0.5 * X[, 1] + X[, 2] Y <- mu0 + D * (mu1 - mu0) + rnorm(n) ## Sample split. splits <- sample_split(length(Y), training_frac = 0.5) training_idx <- splits$training_idx honest_idx <- splits$honest_idx Y_tr <- Y[training_idx] D_tr <- D[training_idx] X_tr <- X[training_idx, ] Y_hon <- Y[honest_idx] D_hon <- D[honest_idx] X_hon <- X[honest_idx, ] ## ----estimate-cates, eval = TRUE---------------------------------------------- ## Estimate the CATEs. Use only training sample. forest <- causal_forest(X_tr, Y_tr, D_tr) cates_tr <- predict(forest, X_tr)$predictions cates_hon <- predict(forest, X_hon)$predictions ## ----construct-sequence, eval = TRUE------------------------------------------ ## Construct the sequence. Use doubly-robust scores (default option). groupings <- build_aggtree(Y_tr, D_tr, X_tr, # Training sample. Y_hon, D_hon, X_hon, # Honest sample. cates_tr = cates_tr, cates_hon = cates_hon) # Predicted CATEs. ## Print. print(groupings) ## Plot. plot(groupings) # Try also setting 'sequence = TRUE'. ## ----inference, eval = TRUE--------------------------------------------------- ## Inference with 4 groups. results <- inference_aggtree(groupings, n_groups = 4) ## LATEX. print(results, table = "diff") print(results, table = "avg_char")