|
import marimo |
|
|
|
__generated_with = "0.11.6" |
|
app = marimo.App(width="medium") |
|
|
|
|
|
@app.cell(hide_code=True) |
|
def _(mo): |
|
mo.md(r"""# Diabetes Dataset Analysis""") |
|
return |
|
|
|
|
|
@app.cell(hide_code=True) |
|
def _(): |
|
import marimo as mo |
|
import polars as pl |
|
return mo, pl |
|
|
|
|
|
@app.cell(hide_code=True) |
|
def _(mo): |
|
mo.accordion( |
|
{"Notes": """ |
|
## Dataset Column Notes |
|
|
|
> Only highlighted columns of interest |
|
|
|
* Diabetes_binary: [ 0 (No diabetes) | 1 (Pre/diabetes) ] |
|
* HighBP: [ 0 (No High BP) | 1 (High BP) ] |
|
* HighChol: [ 0 (No High BP) | 1 (High BP) ] |
|
* Stroke: [ 0 (Never) | 1 (Had) ] |
|
* HeartDiseaseorAttack: [ 0 (No) | 1 (Yes) ] |
|
* Smoker: [ 0 (<100 cigs lifetime) | 1 (>100 cigs lifetime) |
|
* HvyAlcohol: [ 0 (<14 🍺/week for men, <7 🍺/week for women) | 1 (otherwise) ] |
|
"""} |
|
) |
|
return |
|
|
|
|
|
@app.cell(hide_code=True) |
|
def _(pl): |
|
dataset_raw = pl.read_csv("dataset/diabetes_binary_health_indicators_BRFSS2015.csv") |
|
dataset_prior_conditions = dataset_raw.select(["Diabetes_binary", "HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack"]) |
|
dataset_prior_conditions.head() |
|
return dataset_prior_conditions, dataset_raw |
|
|
|
|
|
@app.cell |
|
def _(mo): |
|
mo.md("""## Testing Classifiers""") |
|
return |
|
|
|
|
|
@app.cell |
|
def _(dataset_prior_conditions, mo, pl): |
|
from sklearn.naive_bayes import BernoulliNB |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix |
|
|
|
X_priors_NB, y_priors_NB = dataset_prior_conditions.select(pl.exclude("Diabetes_binary")), dataset_prior_conditions.select("Diabetes_binary") |
|
X_train_priors, X_test_priors, y_train_priors, y_test_priors = train_test_split( |
|
X_priors_NB, y_priors_NB, random_state=33, test_size=0.25 |
|
) |
|
|
|
bnb = BernoulliNB() |
|
dtc = DecisionTreeClassifier() |
|
y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors) |
|
y_pred_dtc = dtc.fit(X_train_priors, y_train_priors).predict(X_test_priors) |
|
mo.accordion( |
|
{ |
|
"Bernoulli NB Metrics": f""" |
|
Accuracy : {accuracy_score(y_test_priors, y_pred_priors)} |
|
|
|
Confusion Matrix: |
|
|
|
``` |
|
{confusion_matrix(y_test_priors, y_pred_priors)} |
|
``` |
|
|
|
Classification Report: |
|
|
|
``` |
|
{classification_report(y_test_priors, y_pred_priors)} |
|
``` |
|
""", |
|
"Decision Tree Classifier": f""" |
|
Accuracy : {accuracy_score(y_test_priors, y_pred_dtc)} |
|
|
|
Confusion Matrix: |
|
|
|
``` |
|
{confusion_matrix(y_test_priors, y_pred_dtc)} |
|
``` |
|
|
|
Classification Report: |
|
|
|
``` |
|
{classification_report(y_test_priors, y_pred_dtc)} |
|
``` |
|
"""}) |
|
return ( |
|
BernoulliNB, |
|
DecisionTreeClassifier, |
|
X_priors_NB, |
|
X_test_priors, |
|
X_train_priors, |
|
accuracy_score, |
|
bnb, |
|
classification_report, |
|
confusion_matrix, |
|
dtc, |
|
train_test_split, |
|
y_pred_dtc, |
|
y_pred_priors, |
|
y_priors_NB, |
|
y_test_priors, |
|
y_train_priors, |
|
) |
|
|
|
|
|
@app.cell |
|
def _(mo): |
|
mo.md(r"""Looks like Bernoulli Naive Bayes' performs better on this dataset, as even though the Decision Tree Classifier has a bit better accuracy, the other metrics do give a better score on the BNB overall.""") |
|
return |
|
|
|
|
|
@app.cell |
|
def _(mo): |
|
mo.md(r"""# Diabetes Predictor using BNB""") |
|
return |
|
|
|
|
|
@app.cell |
|
def _(mo): |
|
priors_predict = mo.md( |
|
''' |
|
Do you suffer from? |
|
|
|
* {high_bp} - High Blood Pressure |
|
* {high_chol} - High Cholesterol |
|
* {stroke} - Stroke |
|
* {heart_disease_or_attack} - Heart Disease or Attack |
|
''' |
|
).batch(high_bp=mo.ui.checkbox(), high_chol=mo.ui.checkbox(), stroke=mo.ui.checkbox(), heart_disease_or_attack=mo.ui.checkbox()) |
|
priors_predict |
|
return (priors_predict,) |
|
|
|
|
|
@app.cell |
|
def _(bnb, mo, priors_predict): |
|
diabetes_or_not = bool(bnb.predict([[i.value for i in priors_predict.values()],])) |
|
prediction = None |
|
if diabetes_or_not: |
|
prediction = mo.md("Diabetes").callout(kind="danger") |
|
else: |
|
prediction = mo.md("No Diabetes").callout(kind="success") |
|
prediction |
|
return diabetes_or_not, prediction |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run() |
|
|