missPLS

missPLS provides methods-first tooling for the PLS workflows described in the article and thesis on incomplete-data NIPALS-PLS regression.

Simulate a dataset

library(missPLS)

sim <- simulate_pls_data(n = 30, p = 12, true_ncomp = 2, seed = 1)
str(sim, max.level = 1)
#> List of 6
#>  $ x         : num [1:30, 1:12] 1.817 -0.924 2.16 5.485 0.628 ...
#>   ..- attr(*, "dimnames")=List of 2
#>  $ y         : num [1:30] -13.642 1.074 -0.505 14.646 3.433 ...
#>  $ data      :'data.frame':  30 obs. of  13 variables:
#>  $ true_ncomp: int 2
#>  $ seed      : int 1
#>  $ model     : chr "li2002"

Add missingness and select components

miss <- add_missingness(sim$x, sim$y, mechanism = "MCAR", missing_prop = 10, seed = 2)

select_ncomp(
  x = miss$x_incomplete,
  y = sim$y,
  method = "nipals_standard",
  criterion = "Q2-LOO",
  max_ncomp = 4,
  seed = 3
)
#>   selection_method criterion selected_ncomp criterion_value max_ncomp seed
#> 1  nipals_standard    Q2-LOO              2       0.9280084         4    3
#>   n_imputations status notes
#> 1             1     ok

Impute and re-fit

imp <- impute_pls_data(miss$x_incomplete, method = "knn", seed = 4)

select_ncomp(
  x = imp,
  y = sim$y,
  method = "complete",
  criterion = "Q2-10fold",
  max_ncomp = 4,
  seed = 5,
  folds = 5
)
#>   selection_method criterion selected_ncomp criterion_value max_ncomp seed
#> 1     complete_knn Q2-10fold              2       0.9550102         4    5
#>   n_imputations status                           notes
#> 1             1     ok Mode across 1 imputed datasets.

Real-data diagnostics

diag_bromhexine <- diagnose_real_data("bromhexine")
head(diag_bromhexine$predictor_correlations)
#>   predictor_1 predictor_2 correlation
#> 1          x1          x2   0.9867412
#> 2          x1          x3   0.9859739
#> 3          x2          x3   0.9941415
#> 4          x1          x4   0.9647867
#> 5          x2          x4   0.9822391
#> 6          x3          x4   0.9941991
head(diag_bromhexine$response_correlations)
#>   predictor correlation
#> 1        x7   0.7317195
#> 2        x8   0.8062487
#> 3        x9   0.8601593
#> 4       x10   0.8922337
#> 5       x11   0.9172034
#> 6       x12   0.9334266

Study runners

The study runners are intended to orchestrate smoke runs in-package and heavier reproduction runs under tools/.

results <- run_simulation_study(
  dimensions = list(c(25, 10)),
  true_ncomp = 2,
  missing_props = 10,
  mechanisms = "MCAR",
  reps = 2,
  seed = 1,
  max_ncomp = 4,
  criteria = c("Q2-LOO", "AIC"),
  incomplete_methods = "nipals_standard",
  imputation_methods = "knn",
  folds = 5
)

summarize_simulation_study(results)
#>                                                                             study
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                   simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                 simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC    simulation
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO              simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO simulation
#>                                                                            dataset
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                   sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                 sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC    sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO              sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO sim_n25_p10
#>                                                                         n  p
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                   25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                 25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC    25 10
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO              25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 25 10
#>                                                                        true_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                            2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                          2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC             2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                         2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                       2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO          2
#>                                                                        target_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                              2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                            2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC               2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                           2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                         2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO            2
#>                                                                        mechanism
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                        <NA>
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                      MCAR
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC         MCAR
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                     <NA>
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                   MCAR
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO      MCAR
#>                                                                        missing_prop
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                              0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                           10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC              10
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                           0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                        10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO           10
#>                                                                                        method
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                                 Complete
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                              KNNimpute
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC    NIPALS-PLSR (standard)
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                              Complete
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                           KNNimpute
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO NIPALS-PLSR (standard)
#>                                                                        criterion
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                         AIC
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                       AIC
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC          AIC
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                   Q2-LOO
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                 Q2-LOO
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO    Q2-LOO
#>                                                                        n_runs
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                        2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                      2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC         2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                     2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                   2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO      2
#>                                                                        n_success
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                           2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                         2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC            2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                        2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                      2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO         2
#>                                                                        n_matched
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                           0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                         0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC            0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                        2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                      2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO         1
#>                                                                        match_rate
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                          0.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                        0.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC           0.0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                       1.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                     1.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO        0.5
#>                                                                        mean_selected_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                                   3.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                                 4.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC                    4.0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                                2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                              2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO                 2.5
#>                                                                        median_selected_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                                     3.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                                   4.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC                      4.0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                                  2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                                2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO                   2.5
#>                                                                        mean_runtime_sec
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                             0.0030
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                           0.0030
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC              0.0145
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                          0.0585
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                        0.0580
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO           0.0145
#>                                                                        status_summary
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC                               ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC                             ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC                ok
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO                            ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO                          ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO             ok