credit_risk_modeling_demo / src /models /util_predict_model.py
pkiage's picture
refractor: place all code in src
7592386
raw
history blame
3.27 kB
from typing import Union, cast
from sklearn.linear_model import LogisticRegression
import pandas as pd
from dataclasses import dataclass
from xgboost import XGBClassifier
from features.util_build_features import SplitDataset
from models.util_predict_model_threshold import (
user_defined_probability_threshold,
J_statistic_driven_probability_threshold,
tradeoff_threshold,
acceptance_rate_driven_threshold,
select_probability_threshold,
model_probability_values_df)
import streamlit as st
def probability_threshold_explainer(model_name):
st.write(
f"""
The {model_name} model (obtained using training data) is applied on testing data to predict the loans probabilities of defaulting.\n
Probabilities of defaulting of the loans are compared to a probability threshold.\n
A loan is predicted to default if its predicted probability of defaulting is greater than the probability threshold.
"""
)
@dataclass(frozen=True)
class Threshold:
probability_threshold_selected: float
predicted_default_status: pd.Series
prediction_probability_df: pd.DataFrame
def make_prediction_view(
model_name_short: str,
model_name: str,
):
def view(
clf_xgbt_model: Union[XGBClassifier, LogisticRegression],
split_dataset: SplitDataset,
) -> Threshold:
probability_threshold_explainer(model_name)
clf_prediction_prob_df_gbt = model_probability_values_df(
clf_xgbt_model,
split_dataset.X_test,
)
(clf_thresh_predicted_default_status_user_gbt,
user_threshold
) = user_defined_probability_threshold(
model_name_short, clf_xgbt_model, split_dataset)
(clf_thresh_predicted_default_status_Jstatistic_gbt,
J_statistic_best_threshold) = J_statistic_driven_probability_threshold(
clf_prediction_prob_df_gbt, clf_xgbt_model, split_dataset)
tradeoff_threshold(clf_prediction_prob_df_gbt, split_dataset)
(acc_rate_thresh_gbt,
clf_thresh_predicted_default_status_acceptance_gbt) = acceptance_rate_driven_threshold(model_name_short, clf_prediction_prob_df_gbt)
(prob_thresh_selected_gbt,
predicted_default_status_gbt) = select_probability_threshold(model_name_short,
user_threshold,
clf_thresh_predicted_default_status_user_gbt,
J_statistic_best_threshold,
clf_thresh_predicted_default_status_Jstatistic_gbt,
acc_rate_thresh_gbt,
clf_thresh_predicted_default_status_acceptance_gbt)
return Threshold(
probability_threshold_selected=cast(
float, prob_thresh_selected_gbt
),
predicted_default_status=predicted_default_status_gbt,
prediction_probability_df=clf_prediction_prob_df_gbt,
)
return view