File size: 13,560 Bytes
3f70783 c4b9c36 3291fa0 f344511 3f70783 c4b9c36 3f70783 2b9bd8f 3f70783 6195581 3291fa0 6195581 3291fa0 3f70783 1e7edfa 4afc0a1 6a7a69f 1e7edfa 3f70783 e4e6b6a 6a7a69f 3f70783 4afc0a1 25d0745 a008082 e4e6b6a fa897fb 11c425a c7e6fe9 11c425a 5c60915 11c425a c7e6fe9 a008082 11c425a a008082 c7e6fe9 a008082 c7e6fe9 a008082 b15d31f 11c425a a008082 73a446b 11c425a e4e6b6a 4afc0a1 6a7a69f 4afc0a1 6a7a69f 4afc0a1 3f70783 e4e6b6a 3f70783 b15d31f 3f70783 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import gradio as gr
from model import SmokerModel
import numpy as np
import pandas as pd
MODEL = SmokerModel("ensemble_softvoting_model.joblib","min_max_scaler.joblib")
def predict(
age, height, weight,
waist, eye_L, eye_R,
hear_L, hear_R, systolic,
relaxation, fasting_blood_sugar, cholesterol,
triglyceride, HDL, LDL,
hemoglobin, urine_protein,
serum_creatinine, AST, ALT,
Gtp, dental_caries
):
'''
Predict the label for the data inputed
'''
# # Combine the input data into a NumPy array
# input_array = np.array([
# age, height, weight,
# waist, eye_L, eye_R,
# hear_L, hear_R, systolic,
# relaxation, fasting_blood_sugar, cholesterol,
# triglyceride, HDL, LDL,
# hemoglobin, urine_protein,
# serum_creatinine, AST, ALT,
# Gtp, dental_caries
# ])
# Create a dictionary with input data and dataset var names
input_data = {
"age": age,
"height(cm)": height,
"weight(kg)": weight,
"waist(cm)": waist,
"eyesight(left)": eye_L,
"eyesight(right)": eye_R,
"hearing(left)": hear_L,
"hearing(right)": hear_R,
"systolic": systolic,
"relaxation": relaxation,
"fasting blood sugar": fasting_blood_sugar,
"Cholesterol": cholesterol,
"triglyceride": triglyceride,
"HDL": HDL,
"LDL": LDL,
"hemoglobin": hemoglobin,
"Urine protein": urine_protein,
"serum creatinine": serum_creatinine,
"AST": AST,
"ALT": ALT,
"Gtp": Gtp,
"dental caries": dental_caries
}
# Convert the dictionary to a pandas DataFrame
input_df = pd.DataFrame(input_data, index=[0])
#predict
# label = MODEL.predict(input_array)
label = MODEL.predict(input_df)
return label
def load_interface():
'''
Configure Gradio interface
'''
#example inputs
ex=[ #TODO: delete if file works
[20,85,135,190,30,125,53,126,0.1,9.9,0.1,9.9,1,2,1,2,79,240,40,140,55,505,72,371,16,405,4,618,1,1660,4.9,20.9,1,6,0.1,10.3,6,1311,1,2062,1,999,0,1],
[40,170,65,75.1,1.0,0.9,1,1,120,70,102,225,260,41,132,15.7,1,0.8,24,26,32,0,45,170,75,89.0,0.7,1.2,1,1,100,67,96,258,345,49,140,15.7,1,1.1,26,28,138,0,30],
[180,90,94.0,1.0,0.8,1,1,115,72,88,177,103,53,103,13.5,1,1.0,19,29,30,0,60,170,65,78.0,1.5,1.0,1,1,110,70,87,190,210,45,103,14.7,1,0.8,21,21,19,0,55],
[175,60,75.0,1.0,1.0,1,1,100,64,93,186,80,86,84,15.4,3,1.0,39,20,35,0,40,160,55,69.0,1.5,1.5,1,1,112,78,90,177,68,78,85,12.4,1,0.5,15,9,14,0,55],
[175,60,80.0,1.2,1.5,1.5,1,1,137,89,80,199,35,68,124,16.0,1,1.1,23,19,17,0,55,160,50,68.0,0.8,0.5,1,1,137,87,90,176,36,67,102,13.6,1,0.7,15,14,13,0]
]
#set blocks
info_page = gr.Blocks()
# model_page = gr.Blocks()
with info_page:
# set title and description
gr.Markdown(
"""
# Ensemble Classifier for Predicting Smoker or Non-Smoker
**Contributors**: Matt Soria, Jake Leniart, Francisco Lozano\n
**University**: Depaul University\n
**Class**: DSC 478, Programming Machine Learning\n
## Overview
Our project focused on creating a classifier for a Kaggle dataset containing bio-signals and information on individuals' smoking status. The classifier aims to identify whether a patient is a smoker based on 22 provided features. You can find the dataset [here](https://www.kaggle.com/datasets/gauravduttakiit/smoker-status-prediction-using-biosignals?resource=download&select=train_dataset.csv).
We developed an Ensemble Classifier with Soft Voting, which combines KNN, SVM, and XGBoost classifiers.
## Labels
- **non-smoker** = 0
- **smoker** = 1
## Classifier Metrics
### Classification Report
```
Train Accuracy: 0.7833977837414656
Test Accuracy: 0.7885084006669232
precision recall f1-score support
non-smoker 0.83 0.84 0.83 4933
smoker 0.72 0.69 0.71 2864
accuracy 0.79 7797
macro avg 0.77 0.77 0.77 7797
weighted avg 0.79 0.79 0.79 7797
```
## Confusion Matrix

## Final Report
For more details about our Ensemble Classifier and the individual models, please refer to our Jupyter notebooks in our project repository.\n
[DSC 478 Project Repo](https://github.com/msoria17/dsc478-project)
"""
)
# with model_page:
# # set title and description
# gr.Markdown(
# """
# # Interact with the Ensemble Classifier Model
# Enter sample bio data to predict smoking status.\n
# **Medical Disclaimer**: The predictions provided by this model are for educational purposes only and should not be considered a substitute for professional medical advice.
# """)
# #set inputs in rows of 3
# with gr.Row():
# age = gr.Number(label="Age", precision=0, minimum=0)
# height = gr.Number(label="Height(cm)", precision=0, minimum=0)
# weight = gr.Number(label="Weight(kg)", precision=0, minimum=0)
# with gr.Row():
# waist = gr.Number(label="Waist(cm)", minimum=0, info="Waist circumference length")
# eye_L = gr.Number(label="Visual acuity of the left eye, measured in diopters (D)", minimum=0)
# eye_R = gr.Number(label="Visual acuity of the right eye, measured in diopters (D)", minimum=0)
# with gr.Row():
# hear_L = gr.Radio(label="Is there any hearing ability in the left ear?",choices=[("Yes",1),("No",2)])
# hear_R = gr.Radio(label="Is there any hearing ability in the right ear?",choices=[("Yes",1),("No",2)])
# systolic = gr.Number(label="Systolic(mmHg)", precision=0, minimum=0, info="Blood Pressure")
# with gr.Row():
# relaxation = gr.Number(label="Relaxation(mmHg)", precision=0, minimum=0, info="Blood Pressure")
# fasting_blood_sugar = gr.Number(label="Fasting Blood Sugar(mg/dL)", precision=0, minimum=0, info="the concentration of glucose (sugar) in the bloodstream after an extended period of fasting")
# cholesterol = gr.Number(label="Total Cholesterol(mg/dL)", precision=0, minimum=0, info="Total amount of cholesterol present in the blood")
# with gr.Row():
# triglyceride = gr.Number(label="Triglyceride(mg/dL)", precision=0, minimum=0, info="A type of fat (lipid) found in blood")
# HDL = gr.Number(label="High-Density Lipoprotein(mg/dL) ", precision=0, minimum=0, info="It is commonly referred to as 'good cholesterol'")
# LDL = gr.Number(label="Low-Density Lipoprotein(mg/dL) ", precision=0, minimum=0, info="It is commonly referred to as 'bad cholesterol'")
# with gr.Row():
# hemoglobin = gr.Number(label="Hemoglobin(g/dL)", minimum=0, info="a protein found in red blood cells that is responsible for carrying oxygen from the lungs to the tissues and organs of the body")
# urine_protein = gr.Radio(label="Does urine contain excessive traces of protein?",choices=[("Yes",2),("No",1)], info="when excessive protein is detected in the urine, it may indicate a problem with kidney function or other underlying health conditions.")
# serum_creatinine = gr.Number(label="Serum creatinine(mg/dL)", minimum=0, info="Serum creatinine levels are commonly measured through a blood test and are used to assess kidney function")
# with gr.Row():
# AST = gr.Number(label="Aspartate Aminotransferase(IU/L)", precision=0, minimum=0, info="glutamic oxaloacetic transaminase type; AST is released into the bloodstream when cells are damaged or destroyed, such as during injury or disease affecting organs rich in AST.")
# ALT = gr.Number(label="Alanine Aminotransferase(IU/L)", precision=0, minimum=0, info="glutamic oxaloacetic transaminase type; ALT is primarily found in the liver cells, and increased levels of ALT in the blood can indicate liver damage or disease")
# Gtp = gr.Number(label="Gamma-glutamyl Transferase(IU/L)", precision=0, minimum=0, info="Elevated levels of GGT in the blood can indicate liver disease or bile duct obstruction. GGT levels are often measured alongside other liver function tests to assess liver health and function.")
# dental_caries = gr.Radio(label="Are there any signs of dental cavities?",choices=[("Yes",1),("No",0)])
# #set button row
# with gr.Row():
# pred_btn = gr.Button("Predict")
# clear_btn = gr.Button("Clear")
# #set label txt box
# smoker_label = gr.Label(label="Predicted Label")
# #set event listeners
# inputs = [age, height, weight, waist, eye_L, eye_R, hear_L, hear_R, systolic, relaxation, fasting_blood_sugar, cholesterol, triglyceride, HDL, LDL, hemoglobin, urine_protein, serum_creatinine, AST, ALT, Gtp, dental_caries]
# pred_btn.click(fn=predict, inputs=inputs, outputs=smoker_label)
# clear_btn.click(lambda: [None]*22, outputs=inputs)
age = gr.Number(label="Age", precision=0, minimum=0)
height = gr.Number(label="Height(cm)", precision=0, minimum=0)
weight = gr.Number(label="Weight(kg)", precision=0, minimum=0)
waist = gr.Number(label="Waist(cm)", minimum=0, info="Waist circumference length")
eye_L = gr.Number(label="Visual acuity of the left eye, measured in diopters (D)", minimum=0)
eye_R = gr.Number(label="Visual acuity of the right eye, measured in diopters (D)", minimum=0)
hear_L = gr.Radio(label="Is there any hearing ability in the left ear?",choices=[("Yes",1),("No",2)])
hear_R = gr.Radio(label="Is there any hearing ability in the right ear?",choices=[("Yes",1),("No",2)])
systolic = gr.Number(label="Systolic(mmHg)", precision=0, minimum=0, info="Blood Pressure")
relaxation = gr.Number(label="Relaxation(mmHg)", precision=0, minimum=0, info="Blood Pressure")
fasting_blood_sugar = gr.Number(label="Fasting Blood Sugar(mg/dL)", precision=0, minimum=0, info="the concentration of glucose (sugar) in the bloodstream after an extended period of fasting")
cholesterol = gr.Number(label="Total Cholesterol(mg/dL)", precision=0, minimum=0, info="Total amount of cholesterol present in the blood")
triglyceride = gr.Number(label="Triglyceride(mg/dL)", precision=0, minimum=0, info="A type of fat (lipid) found in blood")
HDL = gr.Number(label="High-Density Lipoprotein(mg/dL) ", precision=0, minimum=0, info="It is commonly referred to as 'good cholesterol'")
LDL = gr.Number(label="Low-Density Lipoprotein(mg/dL) ", precision=0, minimum=0, info="It is commonly referred to as 'bad cholesterol'")
hemoglobin = gr.Number(label="Hemoglobin(g/dL)", minimum=0, info="a protein found in red blood cells that is responsible for carrying oxygen from the lungs to the tissues and organs of the body")
urine_protein = gr.Radio(label="Does urine contain excessive traces of protein?",choices=[("Yes",2),("No",1)], info="when excessive protein is detected in the urine, it may indicate a problem with kidney function or other underlying health conditions.")
serum_creatinine = gr.Number(label="Serum creatinine(mg/dL)", minimum=0, info="Serum creatinine levels are commonly measured through a blood test and are used to assess kidney function")
AST = gr.Number(label="Aspartate Aminotransferase(IU/L)", precision=0, minimum=0, info="glutamic oxaloacetic transaminase type; AST is released into the bloodstream when cells are damaged or destroyed, such as during injury or disease affecting organs rich in AST.")
ALT = gr.Number(label="Alanine Aminotransferase(IU/L)", precision=0, minimum=0, info="glutamic oxaloacetic transaminase type; ALT is primarily found in the liver cells, and increased levels of ALT in the blood can indicate liver damage or disease")
Gtp = gr.Number(label="Gamma-glutamyl Transferase(IU/L)", precision=0, minimum=0, info="Elevated levels of GGT in the blood can indicate liver disease or bile duct obstruction. GGT levels are often measured alongside other liver function tests to assess liver health and function.")
dental_caries = gr.Radio(label="Are there any signs of dental cavities?",choices=[("Yes",1),("No",0)])
inputs = [age, height, weight, waist, eye_L, eye_R, hear_L, hear_R, systolic, relaxation, fasting_blood_sugar, cholesterol, triglyceride, HDL, LDL, hemoglobin, urine_protein, serum_creatinine, AST, ALT, Gtp, dental_caries]
smoker_label = gr.Label(label="Predicted Label")
model_page = gr.Interface(
predict,
inputs=inputs,
outputs=smoker_label,
examples="file/examples",
title="Interact with the Ensemble Classifier Model",
description="**Medical Disclaimer**: The predictions provided by this model are for educational purposes only and should not be considered a substitute for professional medical advice."
)
iface = gr.TabbedInterface(
[info_page, model_page],
["Information", "Smoker Model"]
)
iface.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/"])
if __name__ == "__main__":
load_interface() |