pkiage commited on
Commit
c62089d
·
1 Parent(s): f50d894

refactor: logistic model

Browse files
app.py CHANGED
@@ -3,14 +3,18 @@ from typing import OrderedDict
3
 
4
 
5
  from src.features.build_features import initialise_data
 
6
  from src.models.xgboost_model import xgboost_class
 
 
 
7
  from src.models.util_strategy_table import strategy_table_view
8
 
9
 
10
  def main():
11
  currency_options = ["USD", "KES", "GBP"]
12
 
13
- model_options = ["XGBoost"]
14
 
15
  currency = st.sidebar.selectbox(
16
  label="What currency will you be using?", options=currency_options
@@ -32,6 +36,10 @@ def main():
32
 
33
  model_classes = OrderedDict()
34
 
 
 
 
 
35
  if "XGBoost" in models_selected_set:
36
  xgboost_model_class = xgboost_class(split_dataset, currency)
37
  model_classes["XGBoost"] = xgboost_model_class
 
3
 
4
 
5
  from src.features.build_features import initialise_data
6
+
7
  from src.models.xgboost_model import xgboost_class
8
+ from src.models.logistic_model import logistic_class
9
+
10
+
11
  from src.models.util_strategy_table import strategy_table_view
12
 
13
 
14
  def main():
15
  currency_options = ["USD", "KES", "GBP"]
16
 
17
+ model_options = ["XGBoost", "Logistic"]
18
 
19
  currency = st.sidebar.selectbox(
20
  label="What currency will you be using?", options=currency_options
 
36
 
37
  model_classes = OrderedDict()
38
 
39
+ if "Logistic" in models_selected_set:
40
+ logistic_model_class = logistic_class(split_dataset, currency)
41
+ model_classes["Logistic"] = logistic_model_class
42
+
43
  if "XGBoost" in models_selected_set:
44
  xgboost_model_class = xgboost_class(split_dataset, currency)
45
  model_classes["XGBoost"] = xgboost_model_class
src/models/logistic_model.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.features.build_features import SplitDataset
2
+
3
+ from src.models.logistic_train_model import logistic_train_model
4
+ from src.models.logistic_predict_model import logistic_predict_model
5
+ from src.models.logistic_test_model import logistic_test_model
6
+
7
+ from src.models.util_model_class import ModelClass
8
+
9
+
10
+ def logistic_class(split_dataset: SplitDataset, currency: str) -> ModelClass:
11
+
12
+ # Train Model
13
+ clf_logistic_model = logistic_train_model(split_dataset)
14
+
15
+ # Predict using Trained Model
16
+ clf_logistic_predictions = logistic_predict_model(
17
+ clf_logistic_model, split_dataset)
18
+
19
+ # Test and Evaluate Model
20
+ df_trueStatus_probabilityDefault_threshStatus_loanAmount_logistic = logistic_test_model(
21
+ clf_logistic_model,
22
+ split_dataset,
23
+ currency,
24
+ clf_logistic_predictions.probability_threshold_selected,
25
+ clf_logistic_predictions.predicted_default_status)
26
+
27
+ return ModelClass(
28
+ model=clf_logistic_model,
29
+ trueStatus_probabilityDefault_threshStatus_loanAmount_df=df_trueStatus_probabilityDefault_threshStatus_loanAmount_logistic,
30
+ probability_threshold_selected=clf_logistic_predictions.probability_threshold_selected,
31
+ predicted_default_status=clf_logistic_predictions.predicted_default_status,
32
+ prediction_probability_df=clf_logistic_predictions.prediction_probability_df,
33
+ )
src/models/logistic_predict_model.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from src.models.util_predict_model import make_prediction_view
2
+
3
+ logistic_predict_model = make_prediction_view(
4
+ "Logistic", "Logisitic Model")
src/models/logistic_test_model.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from src.models.util_test import make_tests_view
2
+
3
+ logistic_test_model = make_tests_view(
4
+ "Logistic", "Logistic Model")
src/models/logistic_train_model.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ from sklearn.linear_model import LogisticRegression
4
+ from src.features.build_features import SplitDataset
5
+ import streamlit as st
6
+ import pandas as pd
7
+
8
+ from src.visualization.graphs_logistic import plot_logistic_coeff_barh
9
+
10
+
11
+ @st.cache(suppress_st_warning=True)
12
+ def create_clf_logistic_model(X_train, y_train):
13
+ # Create and fit the logistic regression model
14
+ return LogisticRegression(solver="lbfgs").fit(X_train, np.ravel(y_train))
15
+
16
+
17
+ @st.cache(suppress_st_warning=True)
18
+ def create_coeff_dict_logistic_model(
19
+ logistic_model, training_data
20
+ ):
21
+ return {
22
+ feat: coef
23
+ for coef, feat in zip(
24
+ logistic_model.coef_[0, :], training_data.columns
25
+ )
26
+ }
27
+
28
+
29
+ def coeff_dict_to_sorted_df(coef_dict):
30
+ coef_dict_sorted = dict(
31
+ sorted(coef_dict.items(), key=lambda item: item[1], reverse=False)
32
+ )
33
+
34
+ data_items = coef_dict_sorted.items()
35
+ data_list = list(data_items)
36
+
37
+ return pd.DataFrame(data_list, columns=["Coefficient", "Value"])
38
+
39
+
40
+ def interpret_clf_logistic_model(clf_logistic_model, split_dataset):
41
+ st.metric(
42
+ label="# of Coefficients in Logistic Regression",
43
+ value=clf_logistic_model.n_features_in_,
44
+ delta=None,
45
+ delta_color="normal",
46
+ )
47
+
48
+ st.subheader("Logistic Regression Coefficient Values")
49
+ st.write(split_dataset)
50
+ st.write(type(split_dataset))
51
+
52
+ coef_dict = create_coeff_dict_logistic_model(
53
+ clf_logistic_model, split_dataset.X_y_train)
54
+
55
+ df = coeff_dict_to_sorted_df(coef_dict)
56
+
57
+ fig = plot_logistic_coeff_barh(df)
58
+
59
+ st.plotly_chart(fig)
60
+
61
+
62
+ def logistic_train_model(split_dataset: SplitDataset):
63
+ st.header("Logistic Regression Model")
64
+
65
+ clf_logistic_model = create_clf_logistic_model(
66
+ split_dataset.X_train, split_dataset.y_train
67
+ )
68
+
69
+ interpret_clf_logistic_model(clf_logistic_model, split_dataset)
70
+
71
+ return clf_logistic_model
src/visualization/graphs_logistic.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+
3
+
4
+ def plot_logistic_coeff_barh(df):
5
+ fig = px.bar(data_frame=df, x="Value",
6
+ y="Coefficient", orientation="h")
7
+
8
+ fig.update_layout(
9
+ title="Logistic Regression Coefficients",
10
+ xaxis_title="Value",
11
+ yaxis_title="Coefficient",)
12
+ return fig