Spaces:
Build error
Build error
refactor: logistic model
Browse files- app.py +9 -1
- src/models/logistic_model.py +33 -0
- src/models/logistic_predict_model.py +4 -0
- src/models/logistic_test_model.py +4 -0
- src/models/logistic_train_model.py +71 -0
- src/visualization/graphs_logistic.py +12 -0
app.py
CHANGED
@@ -3,14 +3,18 @@ from typing import OrderedDict
|
|
3 |
|
4 |
|
5 |
from src.features.build_features import initialise_data
|
|
|
6 |
from src.models.xgboost_model import xgboost_class
|
|
|
|
|
|
|
7 |
from src.models.util_strategy_table import strategy_table_view
|
8 |
|
9 |
|
10 |
def main():
|
11 |
currency_options = ["USD", "KES", "GBP"]
|
12 |
|
13 |
-
model_options = ["XGBoost"]
|
14 |
|
15 |
currency = st.sidebar.selectbox(
|
16 |
label="What currency will you be using?", options=currency_options
|
@@ -32,6 +36,10 @@ def main():
|
|
32 |
|
33 |
model_classes = OrderedDict()
|
34 |
|
|
|
|
|
|
|
|
|
35 |
if "XGBoost" in models_selected_set:
|
36 |
xgboost_model_class = xgboost_class(split_dataset, currency)
|
37 |
model_classes["XGBoost"] = xgboost_model_class
|
|
|
3 |
|
4 |
|
5 |
from src.features.build_features import initialise_data
|
6 |
+
|
7 |
from src.models.xgboost_model import xgboost_class
|
8 |
+
from src.models.logistic_model import logistic_class
|
9 |
+
|
10 |
+
|
11 |
from src.models.util_strategy_table import strategy_table_view
|
12 |
|
13 |
|
14 |
def main():
|
15 |
currency_options = ["USD", "KES", "GBP"]
|
16 |
|
17 |
+
model_options = ["XGBoost", "Logistic"]
|
18 |
|
19 |
currency = st.sidebar.selectbox(
|
20 |
label="What currency will you be using?", options=currency_options
|
|
|
36 |
|
37 |
model_classes = OrderedDict()
|
38 |
|
39 |
+
if "Logistic" in models_selected_set:
|
40 |
+
logistic_model_class = logistic_class(split_dataset, currency)
|
41 |
+
model_classes["Logistic"] = logistic_model_class
|
42 |
+
|
43 |
if "XGBoost" in models_selected_set:
|
44 |
xgboost_model_class = xgboost_class(split_dataset, currency)
|
45 |
model_classes["XGBoost"] = xgboost_model_class
|
src/models/logistic_model.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.features.build_features import SplitDataset
|
2 |
+
|
3 |
+
from src.models.logistic_train_model import logistic_train_model
|
4 |
+
from src.models.logistic_predict_model import logistic_predict_model
|
5 |
+
from src.models.logistic_test_model import logistic_test_model
|
6 |
+
|
7 |
+
from src.models.util_model_class import ModelClass
|
8 |
+
|
9 |
+
|
10 |
+
def logistic_class(split_dataset: SplitDataset, currency: str) -> ModelClass:
|
11 |
+
|
12 |
+
# Train Model
|
13 |
+
clf_logistic_model = logistic_train_model(split_dataset)
|
14 |
+
|
15 |
+
# Predict using Trained Model
|
16 |
+
clf_logistic_predictions = logistic_predict_model(
|
17 |
+
clf_logistic_model, split_dataset)
|
18 |
+
|
19 |
+
# Test and Evaluate Model
|
20 |
+
df_trueStatus_probabilityDefault_threshStatus_loanAmount_logistic = logistic_test_model(
|
21 |
+
clf_logistic_model,
|
22 |
+
split_dataset,
|
23 |
+
currency,
|
24 |
+
clf_logistic_predictions.probability_threshold_selected,
|
25 |
+
clf_logistic_predictions.predicted_default_status)
|
26 |
+
|
27 |
+
return ModelClass(
|
28 |
+
model=clf_logistic_model,
|
29 |
+
trueStatus_probabilityDefault_threshStatus_loanAmount_df=df_trueStatus_probabilityDefault_threshStatus_loanAmount_logistic,
|
30 |
+
probability_threshold_selected=clf_logistic_predictions.probability_threshold_selected,
|
31 |
+
predicted_default_status=clf_logistic_predictions.predicted_default_status,
|
32 |
+
prediction_probability_df=clf_logistic_predictions.prediction_probability_df,
|
33 |
+
)
|
src/models/logistic_predict_model.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.models.util_predict_model import make_prediction_view
|
2 |
+
|
3 |
+
logistic_predict_model = make_prediction_view(
|
4 |
+
"Logistic", "Logisitic Model")
|
src/models/logistic_test_model.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.models.util_test import make_tests_view
|
2 |
+
|
3 |
+
logistic_test_model = make_tests_view(
|
4 |
+
"Logistic", "Logistic Model")
|
src/models/logistic_train_model.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.linear_model import LogisticRegression
|
4 |
+
from src.features.build_features import SplitDataset
|
5 |
+
import streamlit as st
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
from src.visualization.graphs_logistic import plot_logistic_coeff_barh
|
9 |
+
|
10 |
+
|
11 |
+
@st.cache(suppress_st_warning=True)
|
12 |
+
def create_clf_logistic_model(X_train, y_train):
|
13 |
+
# Create and fit the logistic regression model
|
14 |
+
return LogisticRegression(solver="lbfgs").fit(X_train, np.ravel(y_train))
|
15 |
+
|
16 |
+
|
17 |
+
@st.cache(suppress_st_warning=True)
|
18 |
+
def create_coeff_dict_logistic_model(
|
19 |
+
logistic_model, training_data
|
20 |
+
):
|
21 |
+
return {
|
22 |
+
feat: coef
|
23 |
+
for coef, feat in zip(
|
24 |
+
logistic_model.coef_[0, :], training_data.columns
|
25 |
+
)
|
26 |
+
}
|
27 |
+
|
28 |
+
|
29 |
+
def coeff_dict_to_sorted_df(coef_dict):
|
30 |
+
coef_dict_sorted = dict(
|
31 |
+
sorted(coef_dict.items(), key=lambda item: item[1], reverse=False)
|
32 |
+
)
|
33 |
+
|
34 |
+
data_items = coef_dict_sorted.items()
|
35 |
+
data_list = list(data_items)
|
36 |
+
|
37 |
+
return pd.DataFrame(data_list, columns=["Coefficient", "Value"])
|
38 |
+
|
39 |
+
|
40 |
+
def interpret_clf_logistic_model(clf_logistic_model, split_dataset):
|
41 |
+
st.metric(
|
42 |
+
label="# of Coefficients in Logistic Regression",
|
43 |
+
value=clf_logistic_model.n_features_in_,
|
44 |
+
delta=None,
|
45 |
+
delta_color="normal",
|
46 |
+
)
|
47 |
+
|
48 |
+
st.subheader("Logistic Regression Coefficient Values")
|
49 |
+
st.write(split_dataset)
|
50 |
+
st.write(type(split_dataset))
|
51 |
+
|
52 |
+
coef_dict = create_coeff_dict_logistic_model(
|
53 |
+
clf_logistic_model, split_dataset.X_y_train)
|
54 |
+
|
55 |
+
df = coeff_dict_to_sorted_df(coef_dict)
|
56 |
+
|
57 |
+
fig = plot_logistic_coeff_barh(df)
|
58 |
+
|
59 |
+
st.plotly_chart(fig)
|
60 |
+
|
61 |
+
|
62 |
+
def logistic_train_model(split_dataset: SplitDataset):
|
63 |
+
st.header("Logistic Regression Model")
|
64 |
+
|
65 |
+
clf_logistic_model = create_clf_logistic_model(
|
66 |
+
split_dataset.X_train, split_dataset.y_train
|
67 |
+
)
|
68 |
+
|
69 |
+
interpret_clf_logistic_model(clf_logistic_model, split_dataset)
|
70 |
+
|
71 |
+
return clf_logistic_model
|
src/visualization/graphs_logistic.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import plotly.express as px
|
2 |
+
|
3 |
+
|
4 |
+
def plot_logistic_coeff_barh(df):
|
5 |
+
fig = px.bar(data_frame=df, x="Value",
|
6 |
+
y="Coefficient", orientation="h")
|
7 |
+
|
8 |
+
fig.update_layout(
|
9 |
+
title="Logistic Regression Coefficients",
|
10 |
+
xaxis_title="Value",
|
11 |
+
yaxis_title="Coefficient",)
|
12 |
+
return fig
|