File size: 4,403 Bytes
9d94c83
 
27db1e5
9d94c83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdffe0b
9d94c83
 
 
 
 
 
cdffe0b
9d94c83
 
 
 
 
 
 
 
 
cdffe0b
9d94c83
cdffe0b
 
 
 
9d94c83
 
 
 
 
 
 
 
 
 
 
 
 
cdffe0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d94c83
 
cdffe0b
9d94c83
 
 
 
 
 
 
cdffe0b
9d94c83
cdffe0b
9d94c83
 
 
 
 
 
 
 
cdffe0b
 
 
9d94c83
 
27db1e5
 
cdffe0b
27db1e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d94c83
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import marimo

__generated_with = "0.11.6"
app = marimo.App(width="medium")


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""# Diabetes Dataset Analysis""")
    return


@app.cell(hide_code=True)
def _():
    import marimo as mo
    import polars as pl
    return mo, pl


@app.cell(hide_code=True)
def _(mo):
    mo.accordion(
        {"Notes": """
    ## Dataset Column Notes

    > Only highlighted columns of interest 

    * Diabetes_binary: [ 0 (No diabetes) | 1 (Pre/diabetes) ]
    * HighBP: [ 0 (No High BP) | 1 (High BP) ]
    * HighChol: [ 0 (No High BP) | 1 (High BP) ]
    * Stroke: [ 0 (Never) | 1 (Had) ]
    * HeartDiseaseorAttack: [ 0 (No) | 1 (Yes) ]
    * Smoker: [ 0 (<100 cigs lifetime) | 1 (>100 cigs lifetime) 
    * HvyAlcohol: [ 0 (<14 🍺/week for men, <7 🍺/week for women) | 1 (otherwise) ]
        """}
    )
    return


@app.cell(hide_code=True)
def _(pl):
    dataset_raw = pl.read_csv("dataset/diabetes_binary_health_indicators_BRFSS2015.csv")
    dataset_prior_conditions = dataset_raw.select(["Diabetes_binary", "HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack"])
    dataset_prior_conditions.head()
    return dataset_prior_conditions, dataset_raw


@app.cell
def _(mo):
    mo.md("""## Testing Classifiers""")
    return


@app.cell
def _(dataset_prior_conditions, mo, pl):
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

    X_priors_NB, y_priors_NB = dataset_prior_conditions.select(pl.exclude("Diabetes_binary")), dataset_prior_conditions.select("Diabetes_binary")
    X_train_priors, X_test_priors, y_train_priors, y_test_priors = train_test_split(
        X_priors_NB, y_priors_NB, random_state=33, test_size=0.25
    )

    bnb = BernoulliNB()
    dtc = DecisionTreeClassifier()
    y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors)
    y_pred_dtc = dtc.fit(X_train_priors, y_train_priors).predict(X_test_priors)
    mo.accordion(
        {
            "Bernoulli NB Metrics": f"""    
        Accuracy             : {accuracy_score(y_test_priors, y_pred_priors)}

        Confusion Matrix:

    ```
        {confusion_matrix(y_test_priors, y_pred_priors)}
    ```

        Classification Report: 

    ```
        {classification_report(y_test_priors, y_pred_priors)}
    ```
        """,
        "Decision Tree Classifier": f"""    
        Accuracy             : {accuracy_score(y_test_priors, y_pred_dtc)}

        Confusion Matrix:

    ```
        {confusion_matrix(y_test_priors, y_pred_dtc)}
    ```

        Classification Report: 

    ```
        {classification_report(y_test_priors, y_pred_dtc)}
    ```
        """})
    return (
        BernoulliNB,
        DecisionTreeClassifier,
        X_priors_NB,
        X_test_priors,
        X_train_priors,
        accuracy_score,
        bnb,
        classification_report,
        confusion_matrix,
        dtc,
        train_test_split,
        y_pred_dtc,
        y_pred_priors,
        y_priors_NB,
        y_test_priors,
        y_train_priors,
    )


@app.cell
def _(mo):
    mo.md(r"""Looks like Bernoulli Naive Bayes' performs better on this dataset, as even though the Decision Tree Classifier has a bit better accuracy, the other metrics do give a better score on the BNB overall.""")
    return


@app.cell
def _(mo):
    mo.md(r"""# Diabetes Predictor using BNB""")
    return


@app.cell
def _(mo):
    priors_predict = mo.md(
        '''
        Do you suffer from?

        * {high_bp} - High Blood Pressure
        * {high_chol} - High Cholesterol
        * {stroke} - Stroke
        * {heart_disease_or_attack} - Heart Disease or Attack 
        '''
    ).batch(high_bp=mo.ui.checkbox(), high_chol=mo.ui.checkbox(), stroke=mo.ui.checkbox(), heart_disease_or_attack=mo.ui.checkbox())
    priors_predict
    return (priors_predict,)


@app.cell
def _(bnb, mo, priors_predict):
    diabetes_or_not = bool(bnb.predict([[i.value for i in priors_predict.values()],]))
    prediction = None
    if diabetes_or_not:
        prediction = mo.md("Diabetes").callout(kind="danger")
    else: 
        prediction = mo.md("No Diabetes").callout(kind="success")
    prediction
    return diabetes_or_not, prediction


if __name__ == "__main__":
    app.run()