## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(featdelta)

raw_cars <- mtcars
raw_cars$car_id <- seq_len(nrow(raw_cars))

raw_cars <- raw_cars[, c("car_id", "mpg", "cyl", "disp", "hp", "wt", "am")]

head(raw_cars)

## -----------------------------------------------------------------------------
defs_basic <- fd_define(
  transmission = ifelse(am == 1, "automatic", "manual"),
  hp_per_cyl = hp / cyl,
  wt_per_hp = wt / hp
)

defs_basic

## -----------------------------------------------------------------------------
features_basic <- fd_compute(
  data = raw_cars,
  defs = defs_basic,
  key = "car_id"
)

head(features_basic)

## -----------------------------------------------------------------------------
defs_ordered <- fd_define(
  hp_per_cyl = hp / cyl,
  strong_engine = hp_per_cyl > 30,
  engine_label = ifelse(strong_engine, "strong", "regular")
)

features_ordered <- fd_compute(
  data = raw_cars,
  defs = defs_ordered,
  key = "car_id"
)

head(features_ordered)

## -----------------------------------------------------------------------------
log_hp_expr <- expression(log(hp))
heavy_car_expr <- expression(wt > 3.5)

defs_programmatic <- fd_define(
  log_hp = log_hp_expr,
  heavy_car = heavy_car_expr
)

features_programmatic <- fd_compute(
  data = raw_cars,
  defs = defs_programmatic,
  key = "car_id"
)

head(features_programmatic)

## -----------------------------------------------------------------------------
defs_block <- fd_define(
  engine_ratios = fd_block({
    data.frame(
      hp_per_cyl = hp / cyl,
      disp_per_cyl = disp / cyl,
      wt_per_hp = wt / hp
    )
  })
)

features_block <- fd_compute(
  data = raw_cars,
  defs = defs_block,
  key = "car_id"
)

head(features_block)

## -----------------------------------------------------------------------------
defs_script_block <- fd_define(
  engine_script = fd_block({
    hp_per_cyl <- hp / cyl
    disp_per_cyl <- disp / cyl

    ratio_average <- (hp_per_cyl + disp_per_cyl) / 2
    high_ratio <- ratio_average > stats::median(ratio_average, na.rm = TRUE)

    data.frame(
      hp_per_cyl = hp_per_cyl,
      disp_per_cyl = disp_per_cyl,
      engine_ratio_average = ratio_average,
      high_engine_ratio = high_ratio
    )
  })
)

features_script_block <- fd_compute(
  data = raw_cars,
  defs = defs_script_block,
  key = "car_id"
)

head(features_script_block)

## -----------------------------------------------------------------------------
make_engine_features <- function(data) {
  hp_per_cyl <- data$hp / data$cyl
  disp_per_cyl <- data$disp / data$cyl

  data.frame(
    hp_per_cyl = hp_per_cyl,
    disp_per_cyl = disp_per_cyl,
    engine_index = hp_per_cyl + disp_per_cyl
  )
}

defs_function_block <- fd_define(
  engine_features = fd_block(make_engine_features)
)

features_function_block <- fd_compute(
  data = raw_cars,
  defs = defs_function_block,
  key = "car_id"
)

head(features_function_block)

## -----------------------------------------------------------------------------
make_scaled_features <- function(data) {
  vars <- c("hp", "disp", "wt")
  out <- list()

  for (var in vars) {
    center <- mean(data[[var]], na.rm = TRUE)
    spread <- stats::sd(data[[var]], na.rm = TRUE)

    out[[paste0(var, "_scaled")]] <- (data[[var]] - center) / spread
  }

  as.data.frame(out)
}

defs_loop_block <- fd_define(
  scaled_inputs = fd_block(make_scaled_features)
)

features_loop_block <- fd_compute(
  data = raw_cars,
  defs = defs_loop_block,
  key = "car_id"
)

head(features_loop_block)

## -----------------------------------------------------------------------------
defs_combined <- fd_define(
  transmission = ifelse(am == 1, "automatic", "manual"),
  engine_features = fd_block(make_engine_features),
  scaled_inputs = fd_block(make_scaled_features),
  engine_per_weight = engine_index / wt
)

features_combined <- fd_compute(
  data = raw_cars,
  defs = defs_combined,
  key = "car_id"
)

head(features_combined)

## -----------------------------------------------------------------------------
defs_expected <- fd_define(
  optional_engine_flags = fd_block(
    {
      data.frame(
        high_hp = hp > 150
      )
    },
    expected_names = c("high_hp", "high_disp")
  )
)

features_expected <- fd_compute(
  data = raw_cars,
  defs = defs_expected,
  key = "car_id"
)

head(features_expected)

## ----eval = FALSE-------------------------------------------------------------
# # Local computation while developing feature logic
# fd_compute(raw_data, defs, key = "id")
# 
# # Full database pipeline once the definitions are ready
# fd_run(con, sql, defs, key = "id", feat_table_name = "feature_table")

