emery

library(emery)
set.seed(65123)

Emery is a package for estimating accuracy statistics for multiple measurement methods in the absence of a gold standard. It supports sets of methods which are binary, ordinal, or continuous.

The generate_multimethod_data() function can be used to simulate the results from paired measurements of a set of objects.

ex_bin_data <- 
  generate_multimethod_data(
    type = "binary",
    n_method = 3,
    n_obs = 200,
    se = c(0.85, 0.90, 0.95),
    sp = c(0.95, 0.90, 0.85),
    method_names = c("alpha", "beta", "gamma")
  )
ex_bin_data$generated_data[98:103, ]
#>        alpha beta gamma
#> obs098     1    1     1
#> obs099     1    1     1
#> obs100     0    1     0
#> obs101     0    1     0
#> obs102     0    0     0
#> obs103     0    0     0

The resulting list contains the simulated data as well as the parameters used to generate it. If method, observation, or level (ordinal only) names are not provided, default names will be applied

Estimating the accuracy statistics of each method is as simple as calling the estimate_ML() function on the data set. The function expects the data to be a matrix of results with each row representing an observation and each column representing a method. Starting values for the EM algorithm can be provided through the init argument, but these are not required.

ex_bin <- 
  estimate_ML(
    type = "binary",
    data = ex_bin_data$generated_data,
    init = list(prev_1 = 0.8, se_1 = c(0.7, 0.8, 0.75), sp_1 = c(0.85, 0.95, 0.75))
  )
ex_bin
#> $prev_est
#> [1] 0.5151903
#> 
#> $se_est
#>          alpha      beta     gamma
#> [1,] 0.8593233 0.9334435 0.9675317
#> 
#> $sp_est
#>          alpha      beta     gamma
#> [1,] 0.9643469 0.8987253 0.9040096

The result of this function is an S4 object of the class MultiMethodMLEstimate. Basic plots illustrating the estimation process can be created by calling the standard plot() function on the object.

plot(ex_bin)
#> $prev

#> 
#> $se

#> 
#> $sp

#> 
#> $qk

#> 
#> $qk_hist

#> 
#> $se_sp

If the true population parameters are known, as is the case with simulated data, these can be provided to the plot function to enhance the information provided.

plot(ex_bin, params = ex_bin_data$params)
#> $prev

#> 
#> $se

#> 
#> $sp

#> 
#> $qk

#> 
#> $qk_hist

#> 
#> $se_sp

The process for working with ordinal or continuous data is similar to above, though the inputs tend to be more complex.

To simulate ordinal data, we must supply the probability mass functions (pmf) associated with the method’s levels for the “positive” and “negative” observations. It is assumed that “positive” observations correspond to higher levels.

An example pmf for detecting “positive” observations for 3 methods with 5 levels may look something like this.

pmf_pos_ex <- 
  matrix(
    c(
      c(0.05, 0.10, 0.15, 0.30, 0.40),
      c(0.00, 0.05, 0.20, 0.25, 0.50),
      c(0.10, 0.15, 0.20, 0.25, 0.30)
    ),
    nrow = 3, 
    byrow = TRUE
  )

pmf_pos_ex
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.05 0.10 0.15 0.30  0.4
#> [2,] 0.00 0.05 0.20 0.25  0.5
#> [3,] 0.10 0.15 0.20 0.25  0.3

We’ll assume the pmf for negative observations is just the reverse of this for simplicity here.

pmf_neg_ex <- pmf_pos_ex[, 5:1]
ex_ord_data <- 
  generate_multimethod_data(
    type = "ordinal",
    n_method = 3,
    n_obs = 200,
    pmf_pos = pmf_pos_ex,
    pmf_neg = pmf_neg_ex,
    method_names = c("alice", "bob", "carrie"),
    level_names = c("strongly dislike", "dislike", "neutral", "like", "strongly like")
  )
ex_ord_data$generated_data[98:103, ]
#>        alice bob carrie
#> obs098     5   4      5
#> obs099     5   5      4
#> obs100     5   3      4
#> obs101     1   4      1
#> obs102     1   4      2
#> obs103     2   1      3
ex_ord <- 
  estimate_ML(
    type = "ordinal",
    data = ex_ord_data$generated_data,
    level_names = ex_ord_data$params$level_names
  )
ex_ord
#> $prev_est
#> [1] 0.5556177
#> 
#> $A_i_est
#>     alice       bob    carrie 
#> 0.8083350 0.8802505 0.7311353 
#> 
#> $phi_1ij_est
#>                       alice        bob    carrie
#> strongly dislike 0.14001062 0.04838996 0.1002693
#> dislike          0.11604592 0.08533310 0.1611551
#> neutral          0.07061599 0.16554176 0.1535458
#> like             0.26837274 0.26996982 0.2612555
#> strongly like    0.40495473 0.43076536 0.3237743
#> 
#> $phi_0ij_est
#>                         alice          bob     carrie
#> strongly dislike 3.875213e-01 4.458181e-01 0.29093999
#> dislike          3.162206e-01 2.983634e-01 0.27107143
#> neutral          2.042487e-01 2.093289e-01 0.25808238
#> like             9.200943e-02 1.419722e-19 0.11215985
#> strongly like    4.760496e-26 4.648961e-02 0.06774635
plot(ex_ord, params = ex_ord_data$params)
#> $ROC

#> 
#> $q_k1

#> 
#> $q_k0

#> 
#> $q_k1_hist

#> 
#> $phi_d

Unlike binary and ordinal methods which require 3 or more methods to create estimates, continuous method estimates can be produced with data from just 2.

ex_con_data <- 
  generate_multimethod_data(
    type = "continuous",
    n_method = 3,
    n_obs = 200,
    method_names = c("phi", "kappa", "sigma")
  )
ex_con_data$generated_data[98:103, ]
#>              phi     kappa     sigma
#> obs098 12.380241 10.561615 12.149463
#> obs099 13.651226 12.954333 11.037958
#> obs100 12.083743 11.316475  9.652041
#> obs101  8.830481 10.704309  8.865215
#> obs102  9.420454  8.686352  9.200257
#> obs103 10.946638 10.826431  9.480511

Estimating the accuracy parameters is the same as above.

ex_con <- 
  estimate_ML(
    type = "continuous",
    data = ex_con_data$generated_data
  )
ex_con
#> $prev_est
#> [1] 0.5273173
#> 
#> $mu_i1_est
#>      phi    kappa    sigma 
#> 12.06691 11.87176 11.67503 
#> 
#> $sigma_i1_est
#>                phi        kappa       sigma
#> phi    1.009875689 -0.002860623 -0.02656859
#> kappa -0.002860623  1.094208542  0.19605713
#> sigma -0.026568593  0.196057127  1.05248522
#> 
#> $mu_i0_est
#>       phi     kappa     sigma 
#>  9.818314  9.972865 10.026923 
#> 
#> $sigma_i0_est
#>                 phi         kappa       sigma
#> phi    0.7918941636 -0.0001959635 -0.07475076
#> kappa -0.0001959635  0.8502503472 -0.11328714
#> sigma -0.0747507641 -0.1132871374  0.97445652
#> 
#> $eta_j_est
#>      phi    kappa    sigma 
#> 1.675181 1.361760 1.157620 
#> 
#> $A_j_est
#>       phi     kappa     sigma 
#> 0.9530507 0.9133631 0.8764904
plot(ex_con, params = ex_con_data$params)
#> $ROC

#> 
#> $z_k1

#> 
#> $z_k0

#> 
#> $z_k1_hist

Confidence intervals for all accuracy statistics can be estimated by bootstrap. The boot_ML() function is a handy tool for generating bootstrapped estimates.

ex_boot_bin <- boot_ML(
  type = "binary",
  data = ex_bin_data$generated_data,
  n_boot = 20
)

# print the estimates of sensitivity from the complete data set
ex_boot_bin$v_0@results$se_est
#>          alpha      beta     gamma
#> [1,] 0.8593234 0.9334436 0.9675318

# print the first 3 bootstrap estimates of sensitivity
ex_boot_bin$v_star[[1]]$se_est
#>        alpha      beta     gamma
#> [1,] 0.84491 0.9208472 0.9649755
ex_boot_bin$v_star[[2]]$se_est
#>          alpha      beta     gamma
#> [1,] 0.8078001 0.9574307 0.9812427
ex_boot_bin$v_star[[3]]$se_est
#>          alpha      beta     gamma
#> [1,] 0.8273965 0.9239061 0.9248651