28 Threshold selection with tuning

In this example, we will

load and preprocess data
define a workflow including a tunable threshold
tune the threshold using cross-validation and the F-measure
train the final model
evaluate the model using cross-validation and holdout data

using the Loan prediction dataset to illustrate the whole process.

Load the required packages:

library(tidyverse)
library(tidymodels)

Load and preprocess the data:

filename <- paste0(
  "https://gedeck.github.io/machine-learning-with-tidymodels/",
  "datasets/loan_prediction.csv"
)
data <- read_csv(filename, show_col_types = FALSE) |>
  drop_na() |>
  mutate(
    Gender = as.factor(Gender),
    Married = as.factor(Married),
    Dependents = gsub("\\+", "", Dependents) |> as.numeric(),
    Education = as.factor(Education),
    Self_Employed = as.factor(Self_Employed),
    Credit_History = as.factor(Credit_History),
    Property_Area = as.factor(Property_Area),
    Loan_Status = factor(Loan_Status, levels = c("Y", "N"),
      labels = c("Yes", "No"))
  ) |>
  select(-Loan_ID)

Split dataset into training and holdout data, prepare for cross-validation:

set.seed(123)
data_split <- initial_split(data, prop = 0.8, strata = Loan_Status)
train_data <- training(data_split)
holdout_data <- testing(data_split)

resamples <- vfold_cv(train_data, v = 10, strata = Loan_Status)
cv_metrics <- metric_set(f_meas, roc_auc, accuracy)
cv_control <- control_resamples(save_pred = TRUE)

Define the recipe, the model specification (elasticnet logistic regression), and combine them into a workflow:

formula <- Loan_Status ~ Gender + Married + Dependents + Education +
  Self_Employed + ApplicantIncome + CoapplicantIncome + LoanAmount +
  Loan_Amount_Term + Credit_History + Property_Area
recipe_spec <- recipe(formula, data = train_data) |>
  step_dummy(all_nominal(), -all_outcomes())

model_spec <- logistic_reg(engine = "glm", mode = "classification")

tailor_spec <- tailor() |>
  adjust_probability_threshold(threshold = tune())

wf <- workflow() |>
  add_model(model_spec) |>
  add_recipe(recipe_spec) |>
  add_tailor(tailor_spec)

Tune the workflow:

parameters <- extract_parameter_set_dials(wf) |>
  update(threshold = threshold(range = c(0.1, 0.9)))

tune_result <- tune_bayes(wf, resamples = resamples,
  metrics = cv_metrics,
  param_info = parameters, iter = 25)

Figure 28.1 shows the tuning results. We can see that the F-measure is maximized at a threshold of around 0.38. Accuracy is practically unchanged for thresholds between 0.38 and 0.7. ROC-AUC is threshold independent and therefore unchanged.

autoplot(tune_result)

Figure 28.1: Tune threshold as part of the workflow

The “optimal” threshold is:

best_threshold <- tune_result |>
  select_best(metric = "f_meas")
best_threshold

# A tibble: 1 × 2
  threshold .config
      <dbl> <chr>  
1     0.464 iter01

We finalize the model and train it using the full training set:

final_wf <- finalize_workflow(wf, best_threshold)
result_cv <-  fit_resamples(final_wf, resamples, metrics = cv_metrics,
  control = cv_control)
fitted_model <- final_wf |> fit(train_data)

Estimate model performance using the cross-validation results and the holdout data:

cv_results <- collect_metrics(result_cv) |>
  select(.metric, mean) |>
  rename(.estimate = mean) |>
  mutate(result = "Cross-validation", threshold = best_threshold$threshold)
holdout_predictions <- augment(fitted_model, new_data = holdout_data)
holdout_results <-  bind_rows(
  c(roc_auc(holdout_predictions, Loan_Status, .pred_Yes,
      event_level = "first")),
  c(f_meas(holdout_predictions, Loan_Status, .pred_class)),
  c(accuracy(holdout_predictions, Loan_Status, .pred_class))) |>
  select(-.estimator) |>
  mutate(result = "Holdout", threshold = best_threshold$threshold)

The performance metrics are summarized in the following table.

bind_rows(
  cv_results,
  holdout_results,
) |>
  pivot_wider(names_from = .metric, values_from = .estimate) |>
  kableExtra::kbl(digits = 3) |>
  kableExtra::kable_styling(full_width = FALSE)

result	threshold	accuracy	f_meas	roc_auc
Cross-validation	0.464	0.802	0.872	0.752
Holdout	0.464	0.835	0.893	0.733

Table 28.1: Model performance metrics

We can see that the reduced threshold leads to a higher accuracy.

Code

The code of this chapter is summarized here.

Show the code

knitr::opts_chunk$set(echo = TRUE, cache = TRUE, autodep = TRUE,
  fig.align = "center")
library(tidyverse)
library(tidymodels)
filename <- paste0(
  "https://gedeck.github.io/machine-learning-with-tidymodels/",
  "datasets/loan_prediction.csv"
)
data <- read_csv(filename, show_col_types = FALSE) |>
  drop_na() |>
  mutate(
    Gender = as.factor(Gender),
    Married = as.factor(Married),
    Dependents = gsub("\\+", "", Dependents) |> as.numeric(),
    Education = as.factor(Education),
    Self_Employed = as.factor(Self_Employed),
    Credit_History = as.factor(Credit_History),
    Property_Area = as.factor(Property_Area),
    Loan_Status = factor(Loan_Status, levels = c("Y", "N"),
      labels = c("Yes", "No"))
  ) |>
  select(-Loan_ID)
set.seed(123)
data_split <- initial_split(data, prop = 0.8, strata = Loan_Status)
train_data <- training(data_split)
holdout_data <- testing(data_split)

resamples <- vfold_cv(train_data, v = 10, strata = Loan_Status)
cv_metrics <- metric_set(f_meas, roc_auc, accuracy)
cv_control <- control_resamples(save_pred = TRUE)
formula <- Loan_Status ~ Gender + Married + Dependents + Education +
  Self_Employed + ApplicantIncome + CoapplicantIncome + LoanAmount +
  Loan_Amount_Term + Credit_History + Property_Area
recipe_spec <- recipe(formula, data = train_data) |>
  step_dummy(all_nominal(), -all_outcomes())

model_spec <- logistic_reg(engine = "glm", mode = "classification")

tailor_spec <- tailor() |>
  adjust_probability_threshold(threshold = tune())

wf <- workflow() |>
  add_model(model_spec) |>
  add_recipe(recipe_spec) |>
  add_tailor(tailor_spec)
parameters <- extract_parameter_set_dials(wf) |>
  update(threshold = threshold(range = c(0.1, 0.9)))

tune_result <- tune_bayes(wf, resamples = resamples,
  metrics = cv_metrics,
  param_info = parameters, iter = 25)
autoplot(tune_result)
best_threshold <- tune_result |>
  select_best(metric = "f_meas")
best_threshold
final_wf <- finalize_workflow(wf, best_threshold)
result_cv <-  fit_resamples(final_wf, resamples, metrics = cv_metrics,
  control = cv_control)
fitted_model <- final_wf |> fit(train_data)
cv_results <- collect_metrics(result_cv) |>
  select(.metric, mean) |>
  rename(.estimate = mean) |>
  mutate(result = "Cross-validation", threshold = best_threshold$threshold)
holdout_predictions <- augment(fitted_model, new_data = holdout_data)
holdout_results <-  bind_rows(
  c(roc_auc(holdout_predictions, Loan_Status, .pred_Yes,
      event_level = "first")),
  c(f_meas(holdout_predictions, Loan_Status, .pred_class)),
  c(accuracy(holdout_predictions, Loan_Status, .pred_class))) |>
  select(-.estimator) |>
  mutate(result = "Holdout", threshold = best_threshold$threshold)
bind_rows(
  cv_results,
  holdout_results,
) |>
  pivot_wider(names_from = .metric, values_from = .estimate) |>
  kableExtra::kbl(digits = 3) |>
  kableExtra::kable_styling(full_width = FALSE)