## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE ) library(riskdiff) ## ----basic_example------------------------------------------------------------ # Load the data data(cachar_sample) # Quick look at the data head(cachar_sample) table(cachar_sample$areca_nut, cachar_sample$abnormal_screen) ## ----calculate_weights-------------------------------------------------------- # Calculate ATE weights for areca nut use iptw_result <- calc_iptw_weights( data = cachar_sample, treatment = "areca_nut", covariates = c("age", "sex", "residence", "smoking", "tobacco_chewing"), weight_type = "ATE", verbose = TRUE ) # Examine the results print(iptw_result) ## ----check_assumptions-------------------------------------------------------- # Comprehensive assumption checking assumptions <- check_iptw_assumptions(iptw_result, verbose = TRUE) ## ----balance_plots, fig.width=8, fig.height=6--------------------------------- # Create balance plots (requires ggplot2) if (requireNamespace("ggplot2", quietly = TRUE)) { plots <- create_balance_plots(iptw_result, plot_type = "both") print(plots$love_plot) print(plots$ps_plot) } ## ----causal_effect------------------------------------------------------------ # Estimate ATE using IPTW rd_causal <- calc_risk_diff_iptw( data = cachar_sample, outcome = "abnormal_screen", treatment = "areca_nut", covariates = c("age", "sex", "residence", "smoking", "tobacco_chewing"), weight_type = "ATE", verbose = TRUE ) print(rd_causal) summary(rd_causal) ## ----ate_example-------------------------------------------------------------- rd_ate <- calc_risk_diff_iptw( data = cachar_sample, outcome = "abnormal_screen", treatment = "areca_nut", covariates = c("age", "sex", "residence", "smoking"), weight_type = "ATE" ) cat("ATE: The average causal effect of areca nut use in the population\n") cat("Risk Difference:", scales::percent(rd_ate$rd_iptw, accuracy = 0.01), "\n") ## ----att_example-------------------------------------------------------------- rd_att <- calc_risk_diff_iptw( data = cachar_sample, outcome = "abnormal_screen", treatment = "areca_nut", covariates = c("age", "sex", "residence", "smoking"), weight_type = "ATT" ) cat("ATT: The average causal effect among areca nut users\n") cat("Risk Difference:", scales::percent(rd_att$rd_iptw, accuracy = 0.01), "\n") ## ----atc_example-------------------------------------------------------------- rd_atc <- calc_risk_diff_iptw( data = cachar_sample, outcome = "abnormal_screen", treatment = "areca_nut", covariates = c("age", "sex", "residence", "smoking"), weight_type = "ATC" ) cat("ATC: The average causal effect among non-users of areca nut\n") cat("Risk Difference:", scales::percent(rd_atc$rd_iptw, accuracy = 0.01), "\n") ## ----bootstrap_example-------------------------------------------------------- rd_bootstrap <- calc_risk_diff_iptw( data = cachar_sample, outcome = "head_neck_abnormal", treatment = "tobacco_chewing", covariates = c("age", "sex", "residence", "areca_nut"), bootstrap_ci = TRUE, boot_n = 500, # Use more in practice (1000+) verbose = FALSE ) print(rd_bootstrap) ## ----ps_models---------------------------------------------------------------- # Logistic regression (default) ps_logit <- calc_iptw_weights( data = cachar_sample, treatment = "tobacco_chewing", covariates = c("age", "sex", "residence", "areca_nut"), method = "logistic" ) # Probit regression ps_probit <- calc_iptw_weights( data = cachar_sample, treatment = "tobacco_chewing", covariates = c("age", "sex", "residence", "areca_nut"), method = "probit" ) # Compare propensity score distributions cat("Logistic PS range:", round(range(ps_logit$ps), 3), "\n") cat("Probit PS range:", round(range(ps_probit$ps), 3), "\n") ## ----stabilization------------------------------------------------------------ # Unstabilized weights ps_unstab <- calc_iptw_weights( data = cachar_sample, treatment = "areca_nut", covariates = c("age", "sex", "residence"), stabilize = FALSE ) # Stabilized weights (default) ps_stab <- calc_iptw_weights( data = cachar_sample, treatment = "areca_nut", covariates = c("age", "sex", "residence"), stabilize = TRUE ) cat("Unstabilized weight variance:", round(var(ps_unstab$weights), 2), "\n") cat("Stabilized weight variance:", round(var(ps_stab$weights), 2), "\n") ## ----trimming----------------------------------------------------------------- # Check for extreme weights summary(ps_stab$weights) # Trim at 1st and 99th percentiles ps_trimmed <- calc_iptw_weights( data = cachar_sample, treatment = "areca_nut", covariates = c("age", "sex", "residence"), trim_weights = TRUE, trim_quantiles = c(0.01, 0.99) ) cat("Original weight range:", round(range(ps_stab$weights), 2), "\n") cat("Trimmed weight range:", round(range(ps_trimmed$weights), 2), "\n") ## ----comparison--------------------------------------------------------------- # Traditional regression-based risk difference rd_regression <- calc_risk_diff( data = cachar_sample, outcome = "abnormal_screen", exposure = "areca_nut", adjust_vars = c("age", "sex", "residence", "smoking"), link = "auto" ) # IPTW-based causal risk difference rd_iptw <- calc_risk_diff_iptw( data = cachar_sample, outcome = "abnormal_screen", treatment = "areca_nut", covariates = c("age", "sex", "residence", "smoking"), weight_type = "ATE" ) # Compare results comparison_table <- data.frame( Method = c("Regression Adjustment", "IPTW (ATE)"), Risk_Difference = scales::percent(c(rd_regression$rd, rd_iptw$rd_iptw), accuracy = 0.01), CI_Lower = scales::percent(c(rd_regression$ci_lower, rd_iptw$ci_lower), accuracy = 0.01), CI_Upper = scales::percent(c(rd_regression$ci_upper, rd_iptw$ci_upper), accuracy = 0.01), P_Value = sprintf("%.3f", c(rd_regression$p_value, rd_iptw$p_value)) ) print(comparison_table) ## ----poor_balance, eval=FALSE------------------------------------------------- # # Check which variables have poor balance # assumptions <- check_iptw_assumptions(iptw_result) # poor_balance_vars <- assumptions$balance$poor_balance_vars # # if (length(poor_balance_vars) > 0) { # cat("Variables with poor balance:", paste(poor_balance_vars, collapse = ", "), "\n") # # # Try including interactions or polynomial terms # iptw_improved <- calc_iptw_weights( # data = cachar_sample, # treatment = "areca_nut", # covariates = c("age", "I(age^2)", "sex", "residence", # "smoking", "age:sex"), # Add interactions # weight_type = "ATE" # ) # } ## ----extreme_ps, eval=FALSE--------------------------------------------------- # # Check propensity score distribution # iptw_result <- calc_iptw_weights( # data = cachar_sample, # treatment = "areca_nut", # covariates = c("age", "sex", "residence") # ) # # # Identify subjects with extreme scores # extreme_low <- which(iptw_result$ps < 0.05) # extreme_high <- which(iptw_result$ps > 0.95) # # if (length(extreme_low) > 0 || length(extreme_high) > 0) { # cat("Consider trimming sample to region of common support\n") # # # Restrict to common support # common_support <- iptw_result$ps >= 0.05 & iptw_result$ps <= 0.95 # data_restricted <- cachar_sample[common_support, ] # # # Re-analyze with restricted sample # rd_restricted <- calc_risk_diff_iptw( # data = data_restricted, # outcome = "abnormal_screen", # treatment = "areca_nut", # covariates = c("age", "sex", "residence") # ) # } ## ----model_spec, eval=FALSE--------------------------------------------------- # # Simple model # ps_simple <- calc_iptw_weights( # data = cachar_sample, # treatment = "areca_nut", # covariates = c("age", "sex") # ) # # # Complex model with interactions # ps_complex <- calc_iptw_weights( # data = cachar_sample, # treatment = "areca_nut", # covariates = c("age", "I(age^2)", "sex", "residence", # "smoking", "tobacco_chewing", "age:sex") # ) # # # Compare balance # check_iptw_assumptions(ps_simple, verbose = FALSE) # check_iptw_assumptions(ps_complex, verbose = FALSE) ## ----sensitivity, eval=FALSE-------------------------------------------------- # # Simulate an unmeasured confounder # set.seed(123) # cachar_sample$unmeasured_confounder <- rbinom(nrow(cachar_sample), 1, 0.3) # # # Compare results with and without the unmeasured confounder # rd_without_u <- calc_risk_diff_iptw( # data = cachar_sample, # outcome = "abnormal_screen", # treatment = "areca_nut", # covariates = c("age", "sex", "residence") # ) # # rd_with_u <- calc_risk_diff_iptw( # data = cachar_sample, # outcome = "abnormal_screen", # treatment = "areca_nut", # covariates = c("age", "sex", "residence", "unmeasured_confounder") # ) # # cat("Without unmeasured confounder:", scales::percent(rd_without_u$rd_iptw), "\n") # cat("With unmeasured confounder:", scales::percent(rd_with_u$rd_iptw), "\n") # cat("Difference:", scales::percent(abs(rd_without_u$rd_iptw - rd_with_u$rd_iptw)), "\n")