Spaces:

pkiage
/

credit_risk_modeling_demo

Build error

File size: 3,274 Bytes

from typing import Union, cast
from sklearn.linear_model import LogisticRegression


import pandas as pd

from dataclasses import dataclass

from xgboost import XGBClassifier
from  features.util_build_features import SplitDataset

from  models.util_predict_model_threshold import (
    user_defined_probability_threshold,
    J_statistic_driven_probability_threshold,
    tradeoff_threshold,
    acceptance_rate_driven_threshold,
    select_probability_threshold,
    model_probability_values_df)

import streamlit as st


def probability_threshold_explainer(model_name):
    st.write(
        f"""
            The {model_name} model (obtained using training data) is applied on testing data to predict the loans probabilities of defaulting.\n
            Probabilities of defaulting of the loans are compared to a probability threshold.\n
            A loan is predicted to default if its predicted probability of defaulting is greater than the probability threshold.
            """
    )


@dataclass(frozen=True)
class Threshold:
    probability_threshold_selected: float
    predicted_default_status: pd.Series
    prediction_probability_df: pd.DataFrame


def make_prediction_view(
    model_name_short: str,
    model_name: str,
):
    def view(
        clf_xgbt_model: Union[XGBClassifier, LogisticRegression],
        split_dataset: SplitDataset,
    ) -> Threshold:

        probability_threshold_explainer(model_name)

        clf_prediction_prob_df_gbt = model_probability_values_df(
            clf_xgbt_model,
            split_dataset.X_test,
        )

        (clf_thresh_predicted_default_status_user_gbt,
         user_threshold
         ) = user_defined_probability_threshold(
            model_name_short, clf_xgbt_model, split_dataset)

        (clf_thresh_predicted_default_status_Jstatistic_gbt,
         J_statistic_best_threshold) = J_statistic_driven_probability_threshold(
            clf_prediction_prob_df_gbt, clf_xgbt_model, split_dataset)

        tradeoff_threshold(clf_prediction_prob_df_gbt, split_dataset)

        (acc_rate_thresh_gbt,
         clf_thresh_predicted_default_status_acceptance_gbt) = acceptance_rate_driven_threshold(model_name_short, clf_prediction_prob_df_gbt)

        (prob_thresh_selected_gbt,
         predicted_default_status_gbt) = select_probability_threshold(model_name_short,
                                                                      user_threshold,
                                                                      clf_thresh_predicted_default_status_user_gbt,
                                                                      J_statistic_best_threshold,
                                                                      clf_thresh_predicted_default_status_Jstatistic_gbt,
                                                                      acc_rate_thresh_gbt,
                                                                      clf_thresh_predicted_default_status_acceptance_gbt)

        return Threshold(
            probability_threshold_selected=cast(
                float, prob_thresh_selected_gbt
            ),
            predicted_default_status=predicted_default_status_gbt,
            prediction_probability_df=clf_prediction_prob_df_gbt,
        )

    return view