--- title: "Bayesian Quantification of Evidence Sufficiency" author: "Dylan Lawless" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Bayesian Quantification of Evidence Sufficiency} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} library(quantbayes) library(ggplot2) library(dplyr) library(tidyr) library(purrr) # widen default chunk output knitr::opts_chunk$set( fig.width = 8, fig.height = 5, dpi = 120, out.width = "100%" ) #CSS to prevent html_vignette from squashing figures knitr::asis_output(" ") ``` # Overview quantbayes provides a minimal Bayesian transform for quantifying evidence sufficiency from a binary matrix of zero and one entries. The method is context free and produces reproducible scores across workflows and organisations. This vignette shows: 1. Loading the built in dataset 2. Constructing the evidence matrix 3. Running the core transform 4. Producing visual summaries 5. Highlighting variants 6. Using file based input 7. Saving output tables and plots 8. Customising plot themes Note: The variant identifier can be any character string. In the example dataset, the identifiers come from standard Whole Genome Sequencing output produced by Exomiser. --- # 1. Load the example dataset ```{r} data(core_test_data) head(core_test_data) ``` --- # 2. Build the binary evidence matrix ```{r} x <- as.matrix(core_test_data[, -1]) rownames(x) <- core_test_data[[1]] dim(x) ``` --- # 3. Run quantbayes core ```{r} res <- quant_es_core(x) res$global head(res$variants) ``` --- # 4. Evidence sufficiency plots ```{r} plots <- quant_es_plots(res, x) ``` ### Global posterior distribution ```{r} plots$p_global ``` ### Overlay of top candidate densities ```{r} plots$p_overlay ``` ### Evidence matrix ```{r} plots$p_matrix ``` ### Observed evidence proportions ```{r} plots$p_p_hat ``` ### Posterior credible intervals ```{r} plots$p_theta_ci ``` ### Combined panel ```{r} plots$p_combined ``` --- # 5. Highlighting variants of interest ```{r} highlight_demo <- list( list(id = "X-119469998-CT-C_XR", colour = "#ee4035", size = 4), list(id = res$variants$variant_id[1], colour = "#2f4356", size = 4) ) plots2 <- quant_es_plots(res, x, highlight_points = highlight_demo) plots2$p_overlay ``` --- # 6. File based input quantbayes provides a helper for reading flat tables of binary evidence. Values must be 0, 1 or NA. ```{r} # Extract evidence columns ev <- core_test_data[, -1] # Keep only columns containing valid 0, 1, NA entries is_binary_col <- function(x) all(x %in% c(0, 1, NA)) ev <- ev[, vapply(ev, is_binary_col, logical(1))] tmpfile <- tempfile(fileext = ".tsv") write.table( ev, tmpfile, sep = "\t", quote = FALSE, row.names = FALSE ) res_file <- quant_es_from_binary_table(tmpfile, variant_col = NULL) res_file$global ``` --- # 7. Saving output tables ```{r} outdir <- tempdir() if (!dir.exists(outdir)) dir.create(outdir) write.csv(res$variants, file.path(outdir, "variants_results.csv"), row.names = FALSE) write.csv(as.data.frame(res$global), file.path(outdir, "global_summary.csv"), row.names = FALSE) ``` --- # 8. Saving plots ```{r} ggsave(file.path(outdir, "overlay.png"), plots$p_overlay, width = 7, height = 4, dpi = 120) ggsave(file.path(outdir, "matrix.png"), plots$p_matrix, width = 7, height = 6, dpi = 120) ``` --- # 9. Customising plot themes ```{r} plots$p_overlay + theme(legend.position = "none") ``` ```{r} plots$p_overlay + theme_minimal() ``` ```{r} plots$p_overlay + theme_void() ``` ```{r} pal10 <- colorRampPalette(c("black", "grey"))(10) pal20 <- colorRampPalette(c("skyblue", "navy"))(20) plots_custom <- quant_es_plots( res, x, palette10 = pal10, palette20 = pal20 ) plots_custom$p_overlay ``` --- # 10. Flagship overlay plot ```{r} # Two highlighted candidates swiss_red <- "#ee4035" federal_blue <- "#2f4356" # provide the points to highlight highlight_flagship <- list( list(id = core_test_data[[1]][1], colour = swiss_red, size = 4), list(id = "6-32664815-C-G_AR", colour = federal_blue, size = 4) ) plots_flagship <- quant_es_plots( res, x, highlight_points = highlight_flagship ) # add custom ggplot settings flagship_plot <- plots_flagship$p_overlay + ggplot2::guides( fill = ggplot2::guide_legend(title = "highlighted variants"), size = "none" ) + ggplot2::labs( title = "Posterior theta distribution", subtitle = "Top 10 CrI estimates with evidence available\nand two highlighted variants" ) + ggplot2::theme( legend.position = "right", legend.title = ggplot2::element_text(size = 10), legend.text = ggplot2::element_text(size = 9) ) flagship_plot # Save flagship plot outdir <- tempdir() if (!dir.exists(outdir)) dir.create(outdir) ggplot2::ggsave( filename = file.path(outdir, "flagship_plot.pdf"), plot = flagship_plot, width = 8, height = 4 ) ``` --- Here is a **minimal vignette block** that includes the **commands that print the human readable report strings**, fully aligned with your style (British spelling, no em dashes, short, quiet). Paste this directly into the vignette. --- # Core results example ## Clinical genetics example After Whole Genome Sequencing, a candidate selection tool identified possible causal variants. Each was evaluated using a minimal independent evidence set. ```{r} res_df <- as.data.frame(res$variants) global_df <- as.data.frame(res$global) res_df <- res_df[order(res_df$theta_mean, decreasing = TRUE), ] head(res_df) ``` Example output: | variant_id | k | m | theta_mean | theta_lower | theta_upper | percentile | | ------------------- | -- | -- | ---------- | ----------- | ----------- | ---------- | | 2-54234474-G-A_AR | 18 | 24 | 0.7308 | 0.5487 | 0.8793 | 99.875 | | 6-72183475-CG-N_AR | 18 | 24 | 0.7308 | 0.5487 | 0.8793 | 99.875 | | 1-14682421-G-A_AD | 17 | 24 | 0.6923 | 0.5061 | 0.8505 | 99.375 | | 7-751853912-C-T_AR | 17 | 24 | 0.6923 | 0.5061 | 0.8505 | 99.375 | | X-224319469-CT-C_XR | 16 | 24 | 0.6538 | 0.4650 | 0.8203 | 97.000 | | X-414698664-CT-C_XD | 16 | 24 | 0.6538 | 0.4650 | 0.8203 | 97.000 | --- ## Report text (printed automatically) The code below generates the concise human readable report strings. ```{r} # choose the top variant v <- res_df[1, ] g <- global_df variant_line <- sprintf( "Posterior evidence sufficiency: %.3f (credible interval %.3f to %.3f, percentile %.2f)", v$theta_mean, v$theta_lower, v$theta_upper, v$percentile ) global_line <- sprintf( "Global evidence sufficiency: %.2f (credible interval %.2f to %.2f)", g$mean_theta, g$lower_theta, g$upper_theta ) variant_line global_line ``` This produces: ``` Posterior evidence sufficiency: 0.731 (credible interval 0.549 to 0.879, percentile 99.88) Global evidence sufficiency: 0.52 (credible interval 0.38 to 0.65) ``` --- # Summary quantbayes provides a minimal Bayesian transform for quantifying evidence sufficiency from a binary rule matrix. The package includes tools for plotting, highlighting, file based input, exporting results and saving plots for reports and pipelines. quantbayes is designed to be simple, portable and easy to integrate in any workflow.