library(iCARH)
## Loading required package: rstan
## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.19.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## Loading required package: MASS
## Loading required package: glue
library(abind)

Tp=4L # timepoints
N=10L # number of samples
J=14L # number of metabolites
K=2L # number of bacteria species
P=8L  # number of pathways

set.seed(12473)

For real data Build pathway matrices using iCARH.getPathwaysMat. Elements in KEGG id list may contain multiple KEGG ids per metabolite. If KEGG id unknown use : “Unk[number]”.

keggid = list("Unk1", "C03299","Unk2","Unk3",
 c("C08363", "C00712")  # allowing multiple ids per metabolite
 )
 pathways = iCARH.getPathwaysMat(keggid, "rno")

To simulate data use iCARH.simulate. Use path.names to manually choose pathways or simply specify the expected proportion of metabolites per pathway via path.probs.

# Example of manually picked pathways
# path.names = c("path:map00564","path:map00590","path:map00061","path:map00591",
#               "path:map00592","path:map00600","path:map01040","path:map00563")
# Specify expected proportion of metabolites per pathway
path.probs = 0.8
data.sim = iCARH.simulate(Tp, N, J, P, K, path.probs = 0.8, Zgroupeff=c(0,4),
                          beta.val=c(1,-1,0.5, -0.5))

XX = data.sim$XX
Y = data.sim$Y
Z = data.sim$Z
pathways = data.sim$pathways
XX[2,2,2] = NA #missing value example

Check inaccuracies between covariance and design matrices

pathways.bin = lapply(pathways, function(x) { y=1/(x+1); diag(y)=0; y})
adjmat = rowSums(abind::abind(pathways.bin, along = 3), dims=2)
cor.thresh = 0.7
# check number of metabolites in same pathway but not correlated
for(i in 1:Tp) print(sum(abs(cor(XX[i,,])[which(adjmat>0)])<cor.thresh))
## [1] 50
## [1] NA
## [1] 44
## [1] 66

Run iCARH model.

rstan::rstan_options(auto_write = TRUE)
options(mc.cores = 2)
# For testing, does not converge
fit.sim = iCARH.model(XX, Y, Z, groups=rep(c(0,1), each=5), pathways = pathways, control = list(max_treedepth=8),
                     iter = 4, chains = 1)
## [1] "Stan warning: simpleWarning in system2(file.path(R.home(component = \"bin\"), \"R\"), args = paste(\"CMD config\", : running command ''/Library/Frameworks/R.framework/Resources/bin/R' CMD config CXX14 2>/dev/null' had status 69\n"
# Not run
# fit.sim = iCARH.model(XX, Y, Z, pathways, control = list(adapt_delta = 0.99, max_treedepth=10),
#                      iter = 2000, chains = 2, pars=c("YY","Xmis","Ymis"), include=F)

Check convergence

if(!is.null(fit.sim$icarh)){
  rhats = iCARH.checkRhats(fit.sim)
}

Processing results. Bacteria effects.

if(!is.null(fit.sim$icarh)){
  gplot = iCARH.plotBeta(fit.sim)
  gplot
}

Treatments effects

if(!is.null(fit.sim$icarh)){
  gplot = iCARH.plotTreatmentEffect(fit.sim)
  gplot
}

Pathway analysis

if(!is.null(fit.sim$icarh)){
  gplot = iCARH.plotPathwayPerturbation(fit.sim, path.names=names(data.sim$pathways))
  gplot
}

Normality assumptions

if(!is.null(fit.sim$icarh)){
  par(mfrow=c(1,2))
  iCARH.checkNormality(fit.sim)
}

WAIC

if(!is.null(fit.sim$icarh)){
  waic = iCARH.waic(fit.sim)
  waic
}

Posterior predictive checks MAD : mean absolute deviation between covariance matrices

if(!is.null(fit.sim$icarh)){
  mad = iCARH.mad(fit.sim)
  quantile(mad)
}

Get missing data

if(!is.null(fit.sim$icarh)){
  imp = iCARH.getDataImputation(fit.sim)
}