computerscience-person's picture
Add comparison to Decision Tree Classifier.
cdffe0b
import marimo
__generated_with = "0.11.6"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""# Diabetes Dataset Analysis""")
return
@app.cell(hide_code=True)
def _():
import marimo as mo
import polars as pl
return mo, pl
@app.cell(hide_code=True)
def _(mo):
mo.accordion(
{"Notes": """
## Dataset Column Notes
> Only highlighted columns of interest
* Diabetes_binary: [ 0 (No diabetes) | 1 (Pre/diabetes) ]
* HighBP: [ 0 (No High BP) | 1 (High BP) ]
* HighChol: [ 0 (No High BP) | 1 (High BP) ]
* Stroke: [ 0 (Never) | 1 (Had) ]
* HeartDiseaseorAttack: [ 0 (No) | 1 (Yes) ]
* Smoker: [ 0 (<100 cigs lifetime) | 1 (>100 cigs lifetime)
* HvyAlcohol: [ 0 (<14 🍺/week for men, <7 🍺/week for women) | 1 (otherwise) ]
"""}
)
return
@app.cell(hide_code=True)
def _(pl):
dataset_raw = pl.read_csv("dataset/diabetes_binary_health_indicators_BRFSS2015.csv")
dataset_prior_conditions = dataset_raw.select(["Diabetes_binary", "HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack"])
dataset_prior_conditions.head()
return dataset_prior_conditions, dataset_raw
@app.cell
def _(mo):
mo.md("""## Testing Classifiers""")
return
@app.cell
def _(dataset_prior_conditions, mo, pl):
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
X_priors_NB, y_priors_NB = dataset_prior_conditions.select(pl.exclude("Diabetes_binary")), dataset_prior_conditions.select("Diabetes_binary")
X_train_priors, X_test_priors, y_train_priors, y_test_priors = train_test_split(
X_priors_NB, y_priors_NB, random_state=33, test_size=0.25
)
bnb = BernoulliNB()
dtc = DecisionTreeClassifier()
y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors)
y_pred_dtc = dtc.fit(X_train_priors, y_train_priors).predict(X_test_priors)
mo.accordion(
{
"Bernoulli NB Metrics": f"""
Accuracy : {accuracy_score(y_test_priors, y_pred_priors)}
Confusion Matrix:
```
{confusion_matrix(y_test_priors, y_pred_priors)}
```
Classification Report:
```
{classification_report(y_test_priors, y_pred_priors)}
```
""",
"Decision Tree Classifier": f"""
Accuracy : {accuracy_score(y_test_priors, y_pred_dtc)}
Confusion Matrix:
```
{confusion_matrix(y_test_priors, y_pred_dtc)}
```
Classification Report:
```
{classification_report(y_test_priors, y_pred_dtc)}
```
"""})
return (
BernoulliNB,
DecisionTreeClassifier,
X_priors_NB,
X_test_priors,
X_train_priors,
accuracy_score,
bnb,
classification_report,
confusion_matrix,
dtc,
train_test_split,
y_pred_dtc,
y_pred_priors,
y_priors_NB,
y_test_priors,
y_train_priors,
)
@app.cell
def _(mo):
mo.md(r"""Looks like Bernoulli Naive Bayes' performs better on this dataset, as even though the Decision Tree Classifier has a bit better accuracy, the other metrics do give a better score on the BNB overall.""")
return
@app.cell
def _(mo):
mo.md(r"""# Diabetes Predictor using BNB""")
return
@app.cell
def _(mo):
priors_predict = mo.md(
'''
Do you suffer from?
* {high_bp} - High Blood Pressure
* {high_chol} - High Cholesterol
* {stroke} - Stroke
* {heart_disease_or_attack} - Heart Disease or Attack
'''
).batch(high_bp=mo.ui.checkbox(), high_chol=mo.ui.checkbox(), stroke=mo.ui.checkbox(), heart_disease_or_attack=mo.ui.checkbox())
priors_predict
return (priors_predict,)
@app.cell
def _(bnb, mo, priors_predict):
diabetes_or_not = bool(bnb.predict([[i.value for i in priors_predict.values()],]))
prediction = None
if diabetes_or_not:
prediction = mo.md("Diabetes").callout(kind="danger")
else:
prediction = mo.md("No Diabetes").callout(kind="success")
prediction
return diabetes_or_not, prediction
if __name__ == "__main__":
app.run()