import marimo __generated_with = "0.11.6" app = marimo.App(width="medium") @app.cell(hide_code=True) def _(mo): mo.md(r"""# Diabetes Dataset Analysis""") return @app.cell(hide_code=True) def _(): import marimo as mo import polars as pl return mo, pl @app.cell(hide_code=True) def _(mo): mo.accordion( {"Notes": """ ## Dataset Column Notes > Only highlighted columns of interest * Diabetes_binary: [ 0 (No diabetes) | 1 (Pre/diabetes) ] * HighBP: [ 0 (No High BP) | 1 (High BP) ] * HighChol: [ 0 (No High BP) | 1 (High BP) ] * Stroke: [ 0 (Never) | 1 (Had) ] * HeartDiseaseorAttack: [ 0 (No) | 1 (Yes) ] * Smoker: [ 0 (<100 cigs lifetime) | 1 (>100 cigs lifetime) * HvyAlcohol: [ 0 (<14 🍺/week for men, <7 🍺/week for women) | 1 (otherwise) ] """} ) return @app.cell(hide_code=True) def _(pl): dataset_raw = pl.read_csv("dataset/diabetes_binary_health_indicators_BRFSS2015.csv") dataset_prior_conditions = dataset_raw.select(["Diabetes_binary", "HighBP", "HighChol", "Stroke", "HeartDiseaseorAttack"]) dataset_prior_conditions.head() return dataset_prior_conditions, dataset_raw @app.cell def _(mo): mo.md("""## Testing Classifiers""") return @app.cell def _(dataset_prior_conditions, mo, pl): from sklearn.naive_bayes import BernoulliNB from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, classification_report, confusion_matrix X_priors_NB, y_priors_NB = dataset_prior_conditions.select(pl.exclude("Diabetes_binary")), dataset_prior_conditions.select("Diabetes_binary") X_train_priors, X_test_priors, y_train_priors, y_test_priors = train_test_split( X_priors_NB, y_priors_NB, random_state=33, test_size=0.25 ) bnb = BernoulliNB() dtc = DecisionTreeClassifier() y_pred_priors = bnb.fit(X_train_priors, y_train_priors).predict(X_test_priors) y_pred_dtc = dtc.fit(X_train_priors, y_train_priors).predict(X_test_priors) mo.accordion( { "Bernoulli NB Metrics": f""" Accuracy : {accuracy_score(y_test_priors, y_pred_priors)} Confusion Matrix: ``` {confusion_matrix(y_test_priors, y_pred_priors)} ``` Classification Report: ``` {classification_report(y_test_priors, y_pred_priors)} ``` """, "Decision Tree Classifier": f""" Accuracy : {accuracy_score(y_test_priors, y_pred_dtc)} Confusion Matrix: ``` {confusion_matrix(y_test_priors, y_pred_dtc)} ``` Classification Report: ``` {classification_report(y_test_priors, y_pred_dtc)} ``` """}) return ( BernoulliNB, DecisionTreeClassifier, X_priors_NB, X_test_priors, X_train_priors, accuracy_score, bnb, classification_report, confusion_matrix, dtc, train_test_split, y_pred_dtc, y_pred_priors, y_priors_NB, y_test_priors, y_train_priors, ) @app.cell def _(mo): mo.md(r"""Looks like Bernoulli Naive Bayes' performs better on this dataset, as even though the Decision Tree Classifier has a bit better accuracy, the other metrics do give a better score on the BNB overall.""") return @app.cell def _(mo): mo.md(r"""# Diabetes Predictor using BNB""") return @app.cell def _(mo): priors_predict = mo.md( ''' Do you suffer from? * {high_bp} - High Blood Pressure * {high_chol} - High Cholesterol * {stroke} - Stroke * {heart_disease_or_attack} - Heart Disease or Attack ''' ).batch(high_bp=mo.ui.checkbox(), high_chol=mo.ui.checkbox(), stroke=mo.ui.checkbox(), heart_disease_or_attack=mo.ui.checkbox()) priors_predict return (priors_predict,) @app.cell def _(bnb, mo, priors_predict): diabetes_or_not = bool(bnb.predict([[i.value for i in priors_predict.values()],])) prediction = None if diabetes_or_not: prediction = mo.md("Diabetes").callout(kind="danger") else: prediction = mo.md("No Diabetes").callout(kind="success") prediction return diabetes_or_not, prediction if __name__ == "__main__": app.run()