--- title: "Database Queries" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Database Queries} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ``` ```{r setup} library(unitcm) ``` This vignette demonstrates how to query each database module in the UniTCM platform. ## Herb Explorer ### Search and filter herbs ```{r herbs-search} # Text search herbs <- search_herbs(q = "ginseng") # Multi-value faceted filters herbs <- search_herbs( flavors = c("sweet", "bitter"), properties = "warm", toxicity = "non-toxic" ) # View available filter values facets <- fetch_herb_facets() facets$toxicity facets$meridians ``` ### Herb details and compounds UniTCM IDs are flexible: all `get_*` and `export_*` functions accept either the prefixed display form shown on the website (e.g. `"UNITCM_H001"`, `"UNITCM_I00001"`) or the bare numeric ID (e.g. `"1"`, `1L`). The package normalizes them internally before calling the API. ```{r herbs-detail} herb <- get_herb("UNITCM_H001") herb$herb_english_name herb$efficacy # Get compounds found in this herb compounds <- get_herb_compounds("UNITCM_H001", all_pages = TRUE) head(compounds) ``` ### Export data ```{r herbs-export} export_herbs(q = "ginseng", file = "ginseng_herbs.csv") export_herb_compounds("UNITCM_H001", file = "ginseng_compounds.csv") ``` ## Ingredient Explorer ### Search compounds by properties ```{r compounds-search} # Drug-like compounds with molecular weight 200-500 compounds <- search_compounds( mw_min = 200, mw_max = 500, lipinski = "pass", is_drug = TRUE ) # Get facet statistics facets <- fetch_compound_facets() facets$mw_range ``` ### Compound details, ADMET, and targets ```{r compounds-detail} # Full compound record compound <- get_compound("UNITCM_I00001") compound$component_name compound$xref$pubchem_cid # ADMET predictions (~90 endpoints) admet <- get_compound_admet("UNITCM_I00001") admet$caco2_permeability admet$hia # Predicted targets (DrugCLIP deep learning) targets_dc <- get_compound_targets("UNITCM_I00001", method = "drugclip") # ChEMBL similarity-based targets targets_ch <- get_compound_targets("UNITCM_I00001", method = "chembl") # Both sources combined targets_all <- get_compound_targets("UNITCM_I00001", method = "both") table(targets_all$source) ``` ## Disease-Formula Atlas ### Search formulas by disease ```{r formulas-search} # Search by text formulas <- search_formulas(q = "insomnia") # Filter by ICD-11 classification formulas <- search_formulas( level1 = "Neoplasms", mapping_confidence = c("high", "medium") ) # Browse the disease classification tree tree <- fetch_disease_tree() # Available filter options list_book_sources() list_origin_sources() list_dosage_forms() ``` ### Formula details and doses ```{r formulas-detail} formula <- get_formula(1) formula$formula_name formula$efficacy # Herb composition and dosage doses <- get_formula_doses(1) doses ``` ## TCM Ontology ### Search and navigate the ontology ```{r ontology-search} # Full-text search results <- search_ontology("Qi stagnation") # Get entity with relations entity <- get_ontology_entity("TCM_0001") entity$ancestors entity$children entity$external_mappings # Navigate the hierarchy children <- get_ontology_children("TCM_0001") ancestors <- get_ontology_ancestors("TCM_0001") descendants <- get_ontology_descendants("TCM_0001", max_level = 2) ``` ### Ontology tree and statistics ```{r ontology-tree} # Fetch tree at depth 2 tree <- fetch_ontology_tree(depth = 2) # Statistics stats <- fetch_ontology_stats() stats$total_entities # Top-level categories categories <- list_ontology_categories() ``` ### Cross-database mapping ```{r ontology-mapping} # Find TCM entities mapped to a MeSH term mapped <- search_ontology_mapping("MeSH", "D008516") # Export ontology export_ontology("csv", file = "tcm_ontology.csv") export_ontology("json", depth = 3, file = "tcm_ontology.json") ``` ## MIDAS Gene-Disease Analysis ### Gene-to-disease and disease-to-gene ```{r midas-basic} # Which diseases are associated with these genes? g2d <- query_gene_diseases(c("TP53", "BRCA1", "EGFR")) g2d attr(g2d, "gene_mapping") # Which genes are associated with breast cancer? d2g <- query_disease_genes("breast cancer") d2g attr(d2g, "matched_diseases") ``` ### Disease enrichment analysis ```{r midas-enrichment} gene_list <- c("TP53", "BRCA1", "EGFR", "VEGFA", "MYC", "KRAS") enrichment <- query_disease_enrichment( gene_list, p_value_cutoff = 0.05, correction_method = "fdr" ) enrichment attr(enrichment, "total_significant") ``` ### Utility functions ```{r midas-utils} # Convert mixed gene identifiers convert_gene_ids(c("TP53", "7157", "ENSG00000141510")) # Autocomplete disease names autocomplete_disease("breast") # Available data sources fetch_midas_sources() # Database statistics fetch_midas_stats() ```