File size: 1,838 Bytes
c62089d
 
 
7592386
c62089d
 
 
7592386
c62089d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

import numpy as np
from sklearn.linear_model import LogisticRegression
from  features.build_features import SplitDataset
import streamlit as st
import pandas as pd

from  visualization.graphs_logistic import plot_logistic_coeff_barh


@st.cache(suppress_st_warning=True)
def create_clf_logistic_model(X_train, y_train):
    # Create and fit the logistic regression model
    return LogisticRegression(solver="lbfgs").fit(X_train, np.ravel(y_train))


@st.cache(suppress_st_warning=True)
def create_coeff_dict_logistic_model(
    logistic_model, training_data
):
    return {
        feat: coef
        for coef, feat in zip(
            logistic_model.coef_[0, :], training_data.columns
        )
    }


def coeff_dict_to_sorted_df(coef_dict):
    coef_dict_sorted = dict(
        sorted(coef_dict.items(), key=lambda item: item[1], reverse=False)
    )

    data_items = coef_dict_sorted.items()
    data_list = list(data_items)

    return pd.DataFrame(data_list, columns=["Coefficient", "Value"])


def interpret_clf_logistic_model(clf_logistic_model, split_dataset):
    st.metric(
        label="# of Coefficients in Logistic Regression",
        value=clf_logistic_model.n_features_in_,
        delta=None,
        delta_color="normal",
    )

    st.subheader("Logistic Regression Coefficient Values")

    coef_dict = create_coeff_dict_logistic_model(
        clf_logistic_model, split_dataset.X_y_train)

    df = coeff_dict_to_sorted_df(coef_dict)

    fig = plot_logistic_coeff_barh(df)

    st.plotly_chart(fig)


def logistic_train_model(split_dataset: SplitDataset):
    st.header("Logistic Regression Model")

    clf_logistic_model = create_clf_logistic_model(
        split_dataset.X_train, split_dataset.y_train
    )

    interpret_clf_logistic_model(clf_logistic_model, split_dataset)

    return clf_logistic_model