Introduction to the multiocc package

library(interp)
library(MCMCpack)
#> Loading required package: coda
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:interp':
#> 
#>     area
#> ##
#> ## Markov Chain Monte Carlo Package (MCMCpack)
#> ## Copyright (C) 2003-2023 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
#> ##
#> ## Support provided by the U.S. National Science Foundation
#> ## (Grants SES-0350646 and SES-0350613)
#> ##
library(tmvtnorm)
#> Loading required package: mvtnorm
#> Loading required package: Matrix
#> Loading required package: stats4
#> Loading required package: gmm
#> Loading required package: sandwich
library(truncnorm)
library(multiocc)
library(MASS)
library(corrplot)
#> corrplot 0.92 loaded
library(fields)
#> Loading required package: spam
#> Spam version 2.9-1 (2022-08-07) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction 
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#> 
#> Attaching package: 'spam'
#> The following object is masked from 'package:stats4':
#> 
#>     mle
#> The following object is masked from 'package:Matrix':
#> 
#>     det
#> The following objects are masked from 'package:mvtnorm':
#> 
#>     rmvnorm, rmvt
#> The following objects are masked from 'package:base':
#> 
#>     backsolve, forwardsolve
#> Loading required package: viridis
#> Loading required package: viridisLite
#> 
#> Try help(fields) to get started.
data(detection)
data(occupancy)
data(coords)
DataNames <- list("species"=colnames(detection)[4:9],
             "detection"=c("duration"),"occupancy"=c("forest","elev"))
model.input <- multioccbuild(detection, occupancy, coords, DataNames, threshold = 15000)
#> Warning: Rows in detection with missing covariates have been removed for purposes of fitting the model, but the site/season combination is retained in occupancy and therefore predictions will be outputted.

Perform some exploratory data analysis

par(mfrow=c(1,3))
hist(occupancy$forest, main="", xlab="Forest")
hist(occupancy$elev, main="", xlab="Elevation")
hist(detection$duration, main="", xlab="Duration")


par(mfrow=c(3,2), mar=c(3,3,3,1))
quilt.plot(coords[,2:3], occupancy$forest[1:267], main="Forest Cover", zlim=c(-1.5,3))
fit <- Tps(coords[,2:3], occupancy$forest[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Forest Cover (interpolated)", zlim=c(-1.5,2))

quilt.plot(coords[,2:3], occupancy$elev[1:267], main="Elevation", zlim=c(-1.5,3.5))
fit <- Tps(coords[,2:3], occupancy$elev[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Elevation (interpolated)", zlim=c(-1.5,2))

quilt.plot(coords[,2:3], detection$duration[1:267], main="Duration", zlim=c(-2.5,3))
fit <- Tps(coords[,2:3], detection$duration[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Duration (Survey 1)", zlim=c(-2.5,2.5))

A short run for demonstration purposes

## Shorter run for demonstration purposes.
## library(tmvtnorm)
mcmc.out <- GibbsSampler(M.iter=10, M.burn=1, M.thin=1, model.input, q=10, sv=FALSE)
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |======================================================================| 100%

A longer run (not executed) for scientific results

mcmc.out <- GibbsSampler(M.iter=50000, M.burn=20000, M.thin=1, model.input, q=10, sv=FALSE)

Can summarize output

summary(mcmc.out$samples$alpha)
#> 
#> Iterations = 1:9
#> Thinning interval = 1 
#> Number of chains = 1 
#> Sample size per chain = 9 
#> 
#> 1. Empirical mean and standard deviation for each variable,
#>    plus standard error of the mean:
#> 
#>                        Mean      SD Naive SE Time-series SE
#> Great.tit Int       0.69835 0.09921 0.033070       0.075969
#> Great.tit forest   -0.12758 0.06248 0.020826       0.056477
#> Great.tit elev     -0.16073 0.06919 0.023065       0.060338
#> Blue.tit Int        0.46528 0.05866 0.019553       0.038294
#> Blue.tit forest    -0.07889 0.01758 0.005860       0.005860
#> Blue.tit elev      -0.18566 0.04188 0.013960       0.024737
#> Coal.tit Int        0.82393 0.12553 0.041845       0.091271
#> Coal.tit forest    -0.05244 0.01884 0.006280       0.006280
#> Coal.tit elev      -0.12141 0.02623 0.008744       0.008744
#> Crested.tit Int     0.56154 0.11938 0.039795       0.074595
#> Crested.tit forest -0.02662 0.03938 0.013125       0.025112
#> Crested.tit elev   -0.09807 0.02102 0.007007       0.007007
#> Marsh.tit Int       0.34319 0.10527 0.035089       0.075616
#> Marsh.tit forest   -0.05932 0.05014 0.016714       0.025374
#> Marsh.tit elev     -0.17196 0.04795 0.015985       0.019756
#> Willow.tit Int      0.11058 0.08867 0.029556       0.064744
#> Willow.tit forest   0.05672 0.02220 0.007399       0.007399
#> Willow.tit elev     0.06251 0.02000 0.006667       0.004618
#> 
#> 2. Quantiles for each variable:
#> 
#>                        2.5%      25%      50%       75%     97.5%
#> Great.tit Int       0.52827  0.63278  0.74617  0.772258  0.791023
#> Great.tit forest   -0.21613 -0.17854 -0.11179 -0.072962 -0.055661
#> Great.tit elev     -0.24305 -0.22251 -0.16116 -0.099487 -0.062632
#> Blue.tit Int        0.35770  0.43705  0.47382  0.505457  0.521726
#> Blue.tit forest    -0.10507 -0.08619 -0.07589 -0.063430 -0.055951
#> Blue.tit elev      -0.24250 -0.22122 -0.17739 -0.162014 -0.129102
#> Coal.tit Int        0.62527  0.71490  0.84923  0.910489  0.972365
#> Coal.tit forest    -0.07962 -0.06893 -0.04970 -0.037640 -0.031232
#> Coal.tit elev      -0.16518 -0.13060 -0.12589 -0.099580 -0.091850
#> Crested.tit Int     0.34102  0.50222  0.60218  0.659482  0.667231
#> Crested.tit forest -0.06654 -0.06411 -0.03645  0.009276  0.026406
#> Crested.tit elev   -0.12968 -0.10909 -0.09674 -0.084510 -0.072546
#> Marsh.tit Int       0.17105  0.28989  0.37106  0.429146  0.466370
#> Marsh.tit forest   -0.13860 -0.08710 -0.04292 -0.018821 -0.004714
#> Marsh.tit elev     -0.23317 -0.22619 -0.16623 -0.125220 -0.119335
#> Willow.tit Int     -0.04322  0.05387  0.14107  0.183697  0.190370
#> Willow.tit forest   0.01273  0.05508  0.05952  0.072292  0.075793
#> Willow.tit elev     0.03298  0.04822  0.06152  0.073286  0.089333
summary(mcmc.out$samples$rho)
#> 
#> Iterations = 1:9
#> Thinning interval = 1 
#> Number of chains = 1 
#> Sample size per chain = 9 
#> 
#> 1. Empirical mean and standard deviation for each variable,
#>    plus standard error of the mean:
#> 
#>                   Mean      SD Naive SE Time-series SE
#> Great.tit rho   0.7755 0.10238  0.03413        0.08567
#> Blue.tit rho    0.7946 0.09685  0.03228        0.07046
#> Coal.tit rho    0.7972 0.11133  0.03711        0.03711
#> Crested.tit rho 0.8715 0.12801  0.04267        0.07917
#> Marsh.tit rho   0.9399 0.05427  0.01809        0.01809
#> Willow.tit rho  0.9178 0.11220  0.03740        0.03740
#> 
#> 2. Quantiles for each variable:
#> 
#>                   2.5%    25%    50%    75%  97.5%
#> Great.tit rho   0.6647 0.6978 0.7556 0.8430 0.9306
#> Blue.tit rho    0.6514 0.7243 0.8160 0.8620 0.9236
#> Coal.tit rho    0.5778 0.8025 0.8090 0.8680 0.9111
#> Crested.tit rho 0.6429 0.8052 0.9417 0.9610 0.9951
#> Marsh.tit rho   0.8361 0.9154 0.9573 0.9765 0.9940
#> Willow.tit rho  0.6833 0.9456 0.9657 0.9797 0.9822

Visualize correlation matrix

par(mfrow=c(1,1), mar=c(3,3,3,1))
sigout <- mcmc.out$samples$sig
Sig <- matrix(colMeans(sigout),6,6)
SpeciesCor <- cov2cor(Sig)
rownames(SpeciesCor) <- DataNames$species
colnames(SpeciesCor) <- DataNames$species
corrplot::corrplot(SpeciesCor)

Make predictions from fitted model

y.agg1 <-  aggregate(model.input$y[,1], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot1 <- 1*(y.agg1$x>0)

y.agg2 <- aggregate(model.input$y[,2], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot2 <- 1*(y.agg2$x>0)

y.agg3 <- aggregate(model.input$y[,3], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot3 <- 1*(y.agg3$x>0)

y.agg4 <- aggregate(model.input$y[,4], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot4 <- 1*(y.agg4$x>0)

y.agg5 <- aggregate(model.input$y[,5], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot5 <- 1*(y.agg5$x>0)

y.agg6 <- aggregate(model.input$y[,6], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot6 <- 1*(y.agg6$x>0)

for (yr in c(1,4,7,10)){
  print(yr)

  range <- which(model.input$occupancy.info$season == yr)

  psiout <- mcmc.out$samples$psi
  #pout <- mcmc.out$p
  dim(psiout)

  psi1 <- apply(psiout[,0*2670+range],2,mean)
  psi2 <- apply(psiout[,1*2670+range],2,mean)
  psi3 <- apply(psiout[,2*2670+range],2,mean)
  psi4 <- apply(psiout[,3*2670+range],2,mean)
  psi5 <- apply(psiout[,4*2670+range],2,mean)
  psi6 <- apply(psiout[,5*2670+range],2,mean)

  par(mfrow=c(3,2), mar=c(1,3,3,1))
  fit <- Tps(coords[1:267,2:3], psi1)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Great Tit", zlim=c(-0.01,1.01))
  mtext(paste("Year",yr), side=3, line=-2, outer=TRUE)

  y.plot1.in <- y.plot1[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot1.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi2)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Blue Tit", zlim=c(-0.01,1.01))

  y.plot2.in <- y.plot2[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot2.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi3)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Coal Tit", zlim=c(-0.01,1.01))

  y.plot3.in <- y.plot3[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot3.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi4)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Crested Tit", zlim=c(-0.01,1.01))

  y.plot4.in <- y.plot4[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot4.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi5)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Marsh Tit", zlim=c(-0.01,1.01))

  y.plot5.in <- y.plot5[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot5.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi6)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Willow Tit", zlim=c(-0.01,1.01))

  y.plot6.in <- y.plot6[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot6.in==1),2:3])
}
#> [1] 1

#> [1] 4

#> [1] 7

#> [1] 10