missPLS provides methods-first tooling for the PLS
workflows described in the article and thesis on incomplete-data
NIPALS-PLS regression.
library(missPLS)
sim <- simulate_pls_data(n = 30, p = 12, true_ncomp = 2, seed = 1)
str(sim, max.level = 1)
#> List of 6
#> $ x : num [1:30, 1:12] 1.817 -0.924 2.16 5.485 0.628 ...
#> ..- attr(*, "dimnames")=List of 2
#> $ y : num [1:30] -13.642 1.074 -0.505 14.646 3.433 ...
#> $ data :'data.frame': 30 obs. of 13 variables:
#> $ true_ncomp: int 2
#> $ seed : int 1
#> $ model : chr "li2002"miss <- add_missingness(sim$x, sim$y, mechanism = "MCAR", missing_prop = 10, seed = 2)
select_ncomp(
x = miss$x_incomplete,
y = sim$y,
method = "nipals_standard",
criterion = "Q2-LOO",
max_ncomp = 4,
seed = 3
)
#> selection_method criterion selected_ncomp criterion_value max_ncomp seed
#> 1 nipals_standard Q2-LOO 2 0.9280084 4 3
#> n_imputations status notes
#> 1 1 okimp <- impute_pls_data(miss$x_incomplete, method = "knn", seed = 4)
select_ncomp(
x = imp,
y = sim$y,
method = "complete",
criterion = "Q2-10fold",
max_ncomp = 4,
seed = 5,
folds = 5
)
#> selection_method criterion selected_ncomp criterion_value max_ncomp seed
#> 1 complete_knn Q2-10fold 2 0.9550102 4 5
#> n_imputations status notes
#> 1 1 ok Mode across 1 imputed datasets.diag_bromhexine <- diagnose_real_data("bromhexine")
head(diag_bromhexine$predictor_correlations)
#> predictor_1 predictor_2 correlation
#> 1 x1 x2 0.9867412
#> 2 x1 x3 0.9859739
#> 3 x2 x3 0.9941415
#> 4 x1 x4 0.9647867
#> 5 x2 x4 0.9822391
#> 6 x3 x4 0.9941991
head(diag_bromhexine$response_correlations)
#> predictor correlation
#> 1 x7 0.7317195
#> 2 x8 0.8062487
#> 3 x9 0.8601593
#> 4 x10 0.8922337
#> 5 x11 0.9172034
#> 6 x12 0.9334266The study runners are intended to orchestrate smoke runs in-package
and heavier reproduction runs under tools/.
results <- run_simulation_study(
dimensions = list(c(25, 10)),
true_ncomp = 2,
missing_props = 10,
mechanisms = "MCAR",
reps = 2,
seed = 1,
max_ncomp = 4,
criteria = c("Q2-LOO", "AIC"),
incomplete_methods = "nipals_standard",
imputation_methods = "knn",
folds = 5
)
summarize_simulation_study(results)
#> study
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC simulation
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO simulation
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO simulation
#> dataset
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO sim_n25_p10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO sim_n25_p10
#> n p
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 25 10
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 25 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 25 10
#> true_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 2
#> target_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 2
#> mechanism
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC <NA>
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC MCAR
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC MCAR
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO <NA>
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO MCAR
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO MCAR
#> missing_prop
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 10
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 10
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 10
#> method
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC Complete
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC KNNimpute
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC NIPALS-PLSR (standard)
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO Complete
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO KNNimpute
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO NIPALS-PLSR (standard)
#> criterion
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC AIC
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC AIC
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC AIC
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO Q2-LOO
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO Q2-LOO
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO Q2-LOO
#> n_runs
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 2
#> n_success
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 2
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 2
#> n_matched
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 1
#> match_rate
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 0.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 0.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 0.0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 1.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 1.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 0.5
#> mean_selected_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 3.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 4.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 4.0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 2.5
#> median_selected_ncomp
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 3.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 4.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 4.0
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 2.0
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 2.5
#> mean_runtime_sec
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC 0.0030
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC 0.0030
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC 0.0145
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO 0.0585
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO 0.0580
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO 0.0145
#> status_summary
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.AIC ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.AIC ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).AIC ok
#> simulation.sim_n25_p10.25.10.2.2.<NA>.0.Complete.Q2-LOO ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.KNNimpute.Q2-LOO ok
#> simulation.sim_n25_p10.25.10.2.2.MCAR.10.NIPALS-PLSR (standard).Q2-LOO ok