File size: 4,403 Bytes
9d94c83 27db1e5 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 cdffe0b 9d94c83 27db1e5 cdffe0b 27db1e5 9d94c83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import marimo
__generated_with = "0.11.6"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""# Diabetes Dataset Analysis""")
return
@app.cell(hide_code=True)
def _():
import marimo as mo
import polars as pl
return mo, pl
@app.cell(hide_code=True)
def _(mo):
mo.accordion(
{"Notes": """
## Dataset Column Notes
> Only highlighted columns of interest
* Diabetes_binary: [ 0 (No diabetes) | 1 (Pre/diabetes) ]
* HighBP: [ 0 (No High BP) | 1 (High BP) ]
* HighChol: [ 0 (No High BP) | 1 (High BP) ]
* Stroke: [ 0 (Never) | 1 (Had) ]
* HeartDiseaseorAttack: [ 0 (No) | 1 (Yes) ]
* Smoker: [ 0 (<100 cigs lifetime) | 1 (>100 cigs lifetime)
* HvyAlcohol: [ 0 (<14 🍺/week for men, <7 🍺/week for women) | 1 (otherwise) ]
"""}
)
return
@app.cell(hide_code=True)
def _(pl):
dataset_raw = pl.read_csv("dataset/diabetes_binary_health_indicators_BRFSS2015.csv")
dataset_prior_conditions = dataset_raw.select(["Diabetes_binary", "HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack"])
dataset_prior_conditions.head()
return dataset_prior_conditions, dataset_raw
@app.cell
def _(mo):
mo.md("""## Testing Classifiers""")
return
@app.cell
def _(dataset_prior_conditions, mo, pl):
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
X_priors_NB, y_priors_NB = dataset_prior_conditions.select(pl.exclude("Diabetes_binary")), dataset_prior_conditions.select("Diabetes_binary")
X_train_priors, X_test_priors, y_train_priors, y_test_priors = train_test_split(
X_priors_NB, y_priors_NB, random_state=33, test_size=0.25
)
bnb = BernoulliNB()
dtc = DecisionTreeClassifier()
y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors)
y_pred_dtc = dtc.fit(X_train_priors, y_train_priors).predict(X_test_priors)
mo.accordion(
{
"Bernoulli NB Metrics": f"""
Accuracy : {accuracy_score(y_test_priors, y_pred_priors)}
Confusion Matrix:
```
{confusion_matrix(y_test_priors, y_pred_priors)}
```
Classification Report:
```
{classification_report(y_test_priors, y_pred_priors)}
```
""",
"Decision Tree Classifier": f"""
Accuracy : {accuracy_score(y_test_priors, y_pred_dtc)}
Confusion Matrix:
```
{confusion_matrix(y_test_priors, y_pred_dtc)}
```
Classification Report:
```
{classification_report(y_test_priors, y_pred_dtc)}
```
"""})
return (
BernoulliNB,
DecisionTreeClassifier,
X_priors_NB,
X_test_priors,
X_train_priors,
accuracy_score,
bnb,
classification_report,
confusion_matrix,
dtc,
train_test_split,
y_pred_dtc,
y_pred_priors,
y_priors_NB,
y_test_priors,
y_train_priors,
)
@app.cell
def _(mo):
mo.md(r"""Looks like Bernoulli Naive Bayes' performs better on this dataset, as even though the Decision Tree Classifier has a bit better accuracy, the other metrics do give a better score on the BNB overall.""")
return
@app.cell
def _(mo):
mo.md(r"""# Diabetes Predictor using BNB""")
return
@app.cell
def _(mo):
priors_predict = mo.md(
'''
Do you suffer from?
* {high_bp} - High Blood Pressure
* {high_chol} - High Cholesterol
* {stroke} - Stroke
* {heart_disease_or_attack} - Heart Disease or Attack
'''
).batch(high_bp=mo.ui.checkbox(), high_chol=mo.ui.checkbox(), stroke=mo.ui.checkbox(), heart_disease_or_attack=mo.ui.checkbox())
priors_predict
return (priors_predict,)
@app.cell
def _(bnb, mo, priors_predict):
diabetes_or_not = bool(bnb.predict([[i.value for i in priors_predict.values()],]))
prediction = None
if diabetes_or_not:
prediction = mo.md("Diabetes").callout(kind="danger")
else:
prediction = mo.md("No Diabetes").callout(kind="success")
prediction
return diabetes_or_not, prediction
if __name__ == "__main__":
app.run()
|