File size: 3,274 Bytes
7f0977b
 
 
 
 
 
 
 
 
7592386
7f0977b
7592386
7f0977b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from typing import Union, cast
from sklearn.linear_model import LogisticRegression


import pandas as pd

from dataclasses import dataclass

from xgboost import XGBClassifier
from  features.util_build_features import SplitDataset

from  models.util_predict_model_threshold import (
    user_defined_probability_threshold,
    J_statistic_driven_probability_threshold,
    tradeoff_threshold,
    acceptance_rate_driven_threshold,
    select_probability_threshold,
    model_probability_values_df)

import streamlit as st


def probability_threshold_explainer(model_name):
    st.write(
        f"""
            The {model_name} model (obtained using training data) is applied on testing data to predict the loans probabilities of defaulting.\n
            Probabilities of defaulting of the loans are compared to a probability threshold.\n
            A loan is predicted to default if its predicted probability of defaulting is greater than the probability threshold.
            """
    )


@dataclass(frozen=True)
class Threshold:
    probability_threshold_selected: float
    predicted_default_status: pd.Series
    prediction_probability_df: pd.DataFrame


def make_prediction_view(
    model_name_short: str,
    model_name: str,
):
    def view(
        clf_xgbt_model: Union[XGBClassifier, LogisticRegression],
        split_dataset: SplitDataset,
    ) -> Threshold:

        probability_threshold_explainer(model_name)

        clf_prediction_prob_df_gbt = model_probability_values_df(
            clf_xgbt_model,
            split_dataset.X_test,
        )

        (clf_thresh_predicted_default_status_user_gbt,
         user_threshold
         ) = user_defined_probability_threshold(
            model_name_short, clf_xgbt_model, split_dataset)

        (clf_thresh_predicted_default_status_Jstatistic_gbt,
         J_statistic_best_threshold) = J_statistic_driven_probability_threshold(
            clf_prediction_prob_df_gbt, clf_xgbt_model, split_dataset)

        tradeoff_threshold(clf_prediction_prob_df_gbt, split_dataset)

        (acc_rate_thresh_gbt,
         clf_thresh_predicted_default_status_acceptance_gbt) = acceptance_rate_driven_threshold(model_name_short, clf_prediction_prob_df_gbt)

        (prob_thresh_selected_gbt,
         predicted_default_status_gbt) = select_probability_threshold(model_name_short,
                                                                      user_threshold,
                                                                      clf_thresh_predicted_default_status_user_gbt,
                                                                      J_statistic_best_threshold,
                                                                      clf_thresh_predicted_default_status_Jstatistic_gbt,
                                                                      acc_rate_thresh_gbt,
                                                                      clf_thresh_predicted_default_status_acceptance_gbt)

        return Threshold(
            probability_threshold_selected=cast(
                float, prob_thresh_selected_gbt
            ),
            predicted_default_status=predicted_default_status_gbt,
            prediction_probability_df=clf_prediction_prob_df_gbt,
        )

    return view