--- title: "Exploring Sobol indices and randomness with Sobol4R" shorttitle: "Exploring Sobol indices and randomness with Sobol4R" author: - name: "Frédéric Bertrand" affiliation: - Cedric, Cnam, Paris email: frederic.bertrand@lecnam.net date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true vignette: > %\VignetteIndexEntry{Exploring Sobol indices and randomness with Sobol4R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "figures/sobol-exploring-", fig.width = 6, fig.height = 4.5, dpi = 150, message = FALSE, warning = FALSE, eval=FALSE ) LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") library(sensitivity) library(Sobol4R) set.seed(4669) ``` # Context and non random case Test case: the non monotonic Sobol g function. The method of Sobol requires two samples. In the reference case there are eight variables, all following the uniform distribution on [0,1]. ```{r det-design, cache=TRUE, eval=LOCAL} n <- 50000 p <- 8 X1_1 <- data.frame(matrix(runif(p * n), nrow = n)) X2_1 <- data.frame(matrix(runif(p * n), nrow = n)) ``` ```{r det-g-run, cache=TRUE, eval=LOCAL} set.seed(4669) gensol1 <- sobol4r_design( X1 = X1_1, X2 = X2_1, order = 2, nboot = 100 ) Y1 <- sobol_g_function(gensol1$X) x1 <- sensitivity::tell(gensol1, Y1) ``` ```{r det-g-plot, cache=TRUE, eval=LOCAL} print(x1) Sobol4R::autoplot(x1, ncol = 1) ``` ```{r, echo=FALSE, cache=TRUE, eval=LOCAL} rm(gensol1, X1_1, X2_1) ``` ```{r, cache=TRUE, eval=LOCAL} ex1_results <- sobol_example_g_deterministic() print(ex1_results) ``` ```{r, cache=TRUE, eval=LOCAL} Sobol4R::autoplot(ex1_results, ncol = 1) ``` ```{r, echo=FALSE, cache=TRUE, eval=LOCAL} rm(ex1_results) ``` # Sobol and randomness I: random effect on output variable ## Generate data ```{r r1-design, cache=TRUE, eval=LOCAL} n <- 50000 X1_r1 <- data.frame( C1 = runif(n), C2 = runif(n) ) X2_r1 <- data.frame( C1 = runif(n), C2 = runif(n) ) ``` ## Three settings, two input variables The deterministic model is `sobol4r_g2`. The noisy version with Gaussian noise N(0,1) is `sobol4r_g2_noise_const`. The quantity of interest based on the mean over replications is `sobol4r_g2_noise_const_qoi_mean`. ```{r r1-sobol-design, cache=TRUE, eval=LOCAL} set.seed(4669) gensol2 <- sobol4r_design( X1 = X1_r1, X2 = X2_r1, order = 2, nboot = 100 ) ``` ```{r r1-Y, cache=TRUE, eval=LOCAL} Y2 <- sobol_g2_function(gensol2$X) Y3 <- sobol_g2_additive_noise(gensol2$X) Y4 <- sobol_g2_qoi_mean(gensol2$X, nrep = 1000) ``` ```{r r1-results, cache=TRUE, eval=LOCAL} x2 <- sensitivity::tell(gensol2, Y2) x3 <- sensitivity::tell(gensol2, Y3) x4 <- sensitivity::tell(gensol2, Y4) ``` ```{r r1-print, cache=TRUE, eval=LOCAL} print(x2) print(x3) print(x4) ``` ```{r r1-plot, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(x2) Sobol4R::autoplot(x3) Sobol4R::autoplot(x4) ``` ```{r, echo=FALSE, cache=TRUE, eval=LOCAL} rm(gensol2) ``` ```{r, cache=TRUE, eval=LOCAL} ex2_results <- sobol_example_random_output() ex2_results ``` ```{r, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(ex2_results$x_det) Sobol4R::autoplot(ex2_results$x_noise) Sobol4R::autoplot(ex2_results$x_qoi) ``` ```{r, cache=TRUE, eval=LOCAL} rm(ex2_results) ``` # Sobol and randomness II: large random effect depending on an input variable We keep the previously generated values for C1 and C2 and add a third variable C3 distributed as `runif(n, min = 1, max = 100)`. The third variable controls the mean of the Gaussian noise. ```{r r2-design, cache=TRUE, eval=LOCAL} n <- 50000 X1_r2 <- data.frame( C1 = X1_r1$C1, C2 = X1_r1$C2, C3 = runif(n, min = 1, max = 100) ) X2_r2 <- data.frame( C1 = X2_r1$C1, C2 = X2_r1$C2, C3 = runif(n, min = 1, max = 100) ) ``` ```{r r2-head, cache=TRUE, eval=LOCAL} head(X1_r1) head(X1_r2) ``` ```{r r2-sobol-design, cache=TRUE, eval=LOCAL} set.seed(4669) gensol3 <- sobol4r_design( X1 = X1_r2, X2 = X2_r2, order = 2, nboot = 100 ) ``` ```{r r2-Y, cache=TRUE, eval=LOCAL} Y5 <- sobol_g2_with_covariate_noise(gensol3$X) Y6 <- sobol_g2_qoi_covariate_mean(gensol3$X, nrep = 1000) ``` ```{r r2-results, cache=TRUE, eval=LOCAL} x5 <- sensitivity::tell(gensol3, Y5) x6 <- sensitivity::tell(gensol3, Y6) ``` ```{r r2-print, cache=TRUE, eval=LOCAL} print(x5) print(x6) ``` ```{r r2-plot, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(x5) Sobol4R::autoplot(x6) ``` ```{r, echo=FALSE, cache=TRUE, eval=LOCAL} rm(gensol3, X1_r2, X2_r2) ``` ```{r, cache=TRUE, eval=LOCAL} ex3_results <- sobol_example_covariate_large() ex3_results ``` ```{r, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(ex3_results$x_single) Sobol4R::autoplot(ex3_results$x_qoi) ``` ```{r, cache=TRUE, eval=LOCAL} rm(ex3_results) ``` # Sobol and randomness III: slight random effect depending on an input variable We now take a third input C3 distributed as `runif(n, min = 1, max = 1.5)`, which induces a much smaller range for the mean of the noise. ```{r r3-design, cache=TRUE, eval=LOCAL} n <- 50000 X1_r3 <- data.frame( C1 = X1_r1$C1, C2 = X1_r1$C2, C3 = runif(n, min = 1, max = 1.5) ) X2_r3 <- data.frame( C1 = X2_r1$C1, C2 = X2_r1$C2, C3 = runif(n, min = 1, max = 1.5) ) ``` ```{r r3-sobol-design, cache=TRUE, eval=LOCAL} set.seed(4669) gensol4 <- sobol4r_design( X1 = X1_r3, X2 = X2_r3, order = 2, nboot = 100 ) ``` ```{r r3-Y, cache=TRUE, eval=LOCAL} Y7 <- sobol_g2_with_covariate_noise(gensol4$X) Y8 <- sobol_g2_qoi_covariate_mean(gensol4$X, nrep = 1000) ``` ```{r r3-results, cache=TRUE, eval=LOCAL} x7 <- sensitivity::tell(gensol4, Y7) x8 <- sensitivity::tell(gensol4, Y8) ``` ```{r r3-print, cache=TRUE, eval=LOCAL} print(x7) print(x8) ``` ```{r r3-plot, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(x7) Sobol4R::autoplot(x8) ``` ```{r, echo=FALSE, cache=TRUE, eval=LOCAL} rm(gensol4, X1_r3, X2_r3) ``` ```{r, cache=TRUE, eval=LOCAL} ex4_results <- sobol_example_covariate_small() ex4_results ``` ```{r, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(ex4_results$x_single) Sobol4R::autoplot(ex4_results$x_qoi) ``` ```{r, cache=TRUE, eval=LOCAL} rm(ex4_results) ``` # Sobol and randomness IV: random variables with fixed distribution parameters We now turn to the process model. The uncertain inputs are the distributional parameters of the individual unit model. The quantity of interest is the time needed to reach a given number of successes. ```{r process-design, cache=TRUE, eval=LOCAL} n <- 100 draw_params <- function(n) { data.frame(t(replicate( n, c( 1 / runif(1, min = 20, max = 100), 1 / runif(1, min = 24, max = 2000), 1 / runif(1, min = 24, max = 120), runif(1, min = 0.05, max = 0.3), runif(1, min = 0.3, max = 0.7) ) ))) } X1_process <- draw_params(n) X2_process <- draw_params(n) ``` ```{r process-sobol-design, cache=TRUE, eval=LOCAL} set.seed(4669) gensolp1 <- sobol4r_design( X1 = X1_process, X2 = X2_process, order = 2, nboot = 10 ) ``` ```{r process-Y, cache=TRUE, eval=LOCAL} MM <- 50 Yp1 <- process_fun_row_wise(gensolp1$X, M = MM) Yp2 <- process_fun_mean_to_M(gensolp1$X, M = MM, nrep = 10) ``` ```{r process-results, cache=TRUE, eval=LOCAL} xp1 <- sensitivity::tell(gensolp1, Yp1) xp2 <- sensitivity::tell(gensolp1, Yp2) ``` ```{r process-print, cache=TRUE, eval=LOCAL} print(xp1) print(xp2) ``` ```{r process-plot, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(xp1) Sobol4R::autoplot(xp2) ``` ```{r, echo=FALSE, cache=TRUE, eval=LOCAL} rm( X1_r1, X2_r1, X1_process, X2_process, gensolp1 ) ``` ```{r, cache=TRUE, eval=LOCAL} ex5_results <- sobol_example_process(order = 2) ex5_results ``` ```{r, fig.keep='all', cache=TRUE, eval=LOCAL} Sobol4R::autoplot(ex5_results$xp_single) Sobol4R::autoplot(ex5_results$xp_qoi) ``` ```{r, cache=TRUE, eval=LOCAL} rm(ex5_results) ```