bsiddhharth commited on
Commit
7e94977
·
0 Parent(s):

added app.py , pickle files, diabetes_ipynb , requirements

Browse files
Files changed (6) hide show
  1. .gitignore +12 -0
  2. app.py +215 -0
  3. diabetes_pred.ipynb +0 -0
  4. randomforest_model.pkl +0 -0
  5. requirements.txt +6 -0
  6. xgboost_model.pkl +0 -0
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore virtual environment
2
+ venv3/
3
+
4
+ # Ignore environment files
5
+ .env
6
+
7
+ # Ignore Python compiled files
8
+ *.pyc
9
+ __pycache__/
10
+
11
+
12
+ diabetes_prediction.ipynb
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ # page configuration
7
+ st.set_page_config(
8
+ page_title="Diabetes Prediction App",
9
+ page_icon="🏥",
10
+ layout="centered"
11
+ )
12
+
13
+ # loading the model
14
+ @st.cache_resource
15
+ def load_model():
16
+ try:
17
+ with open('xgboost_model.pkl', 'rb') as file:
18
+ model = pickle.load(file)
19
+ return model
20
+ except Exception as e:
21
+ st.error(f"Error loading model: {str(e)}")
22
+ return None
23
+
24
+ def preprocess_input(gender, age, hypertension, heart_disease, smoking_history, bmi, hba1c_level, blood_glucose_level):
25
+ """
26
+ Preprocess the input data (matching training data)
27
+ """
28
+
29
+ data = {
30
+ 'gender_Female': [0],
31
+ 'gender_Male': [0],
32
+ 'gender_Other': [0],
33
+ 'smoking_history_No Info': [0],
34
+ 'smoking_history_current': [0],
35
+ 'smoking_history_ever': [0],
36
+ 'smoking_history_former': [0],
37
+ 'smoking_history_never': [0],
38
+ 'smoking_history_not current': [0],
39
+ 'age': [age],
40
+ 'hypertension': [hypertension],
41
+ 'heart_disease': [heart_disease],
42
+ 'bmi': [bmi],
43
+ 'HbA1c_level': [hba1c_level],
44
+ 'blood_glucose_level': [blood_glucose_level]
45
+ }
46
+
47
+ # gender
48
+ gender_map = {0:'Female', 1:'Male'}
49
+ data[f'gender_{gender_map[gender]}'] = [1]
50
+
51
+ # smoking history
52
+ smoking_map = {
53
+ 0: 'never',
54
+ 1: 'former',
55
+ 2: 'current',
56
+ 3: 'not current',
57
+ 4: 'ever',
58
+ 5: 'No Info'
59
+ }
60
+ data[f'smoking_history_{smoking_map[smoking_history]}'] = [1]
61
+
62
+ # dataFrame
63
+ df = pd.DataFrame(data)
64
+
65
+ # Ensure exact column order as seen in the training data
66
+ expected_columns = [
67
+ 'gender_Female', 'gender_Male', 'gender_Other',
68
+ 'smoking_history_No Info', 'smoking_history_current',
69
+ 'smoking_history_ever', 'smoking_history_former',
70
+ 'smoking_history_never', 'smoking_history_not current', 'age',
71
+ 'hypertension', 'heart_disease', 'bmi', 'HbA1c_level',
72
+ 'blood_glucose_level'
73
+ ]
74
+
75
+ df = df.reindex(columns=expected_columns,fill_value=0)
76
+ return df
77
+
78
+ def main():
79
+ st.title("Diabetes Prediction System 🏥")
80
+ st.markdown("""
81
+ This app predicts the likelihood of diabetes based on various health parameters.
82
+ Please fill in the information below to get a prediction.
83
+ """)
84
+
85
+ with st.form("prediction_form"):
86
+ st.subheader("Patient Information")
87
+
88
+ col1, col2 = st.columns(2)
89
+
90
+ with col1:
91
+ gender = st.selectbox(
92
+ "Gender",
93
+ options=[0, 1],
94
+ format_func=lambda x: "Female" if x == 0 else "Male"
95
+ )
96
+
97
+ age = st.number_input(
98
+ "Age",
99
+ min_value=0,
100
+ max_value=120,
101
+ value=40
102
+ )
103
+
104
+ hypertension = st.selectbox(
105
+ "Hypertension",
106
+ options=[0, 1],
107
+ format_func= lambda x: "No" if x == 0 else "Yes"
108
+ )
109
+
110
+ heart_disease = st.selectbox(
111
+ "Heart Disease",
112
+ options=[0, 1],
113
+ format_func= lambda x: "No" if x == 0 else "Yes"
114
+ )
115
+
116
+ with col2:
117
+ smoking_history = st.selectbox(
118
+ "Smoking History",
119
+ options=[0, 1, 2, 3, 4, 5],
120
+ format_func=lambda x: {
121
+ 0: "Never",
122
+ 1: "Former",
123
+ 2: "Current",
124
+ 3: "Not Current",
125
+ 4: "Ever",
126
+ 5: "No Info"
127
+ }[x]
128
+ )
129
+
130
+ bmi = st.number_input(
131
+ "BMI",
132
+ min_value=10.0,
133
+ max_value=100.0,
134
+ value=25.0,
135
+ step=0.1
136
+ )
137
+
138
+ hba1c_level = st.number_input(
139
+ "HbA1c Level",
140
+ min_value=3.0,
141
+ max_value=15.0,
142
+ value=5.5,
143
+ step=0.1
144
+ )
145
+
146
+ blood_glucose_level = st.number_input(
147
+ "Blood Glucose Level",
148
+ min_value=50,
149
+ max_value=500,
150
+ value=120
151
+ )
152
+
153
+ submit_button = st.form_submit_button("Predict")
154
+
155
+ model = load_model()
156
+
157
+ if submit_button and model is not None:
158
+ try:
159
+ # Preprocess the input data
160
+ input_df = preprocess_input(
161
+ gender, age, hypertension, heart_disease,
162
+ smoking_history, bmi, hba1c_level, blood_glucose_level
163
+ )
164
+
165
+ # Debug information
166
+ with st.expander("Show preprocessed features"):
167
+ st.write(input_df)
168
+
169
+ # Make prediction
170
+ prediction = model.predict(input_df)
171
+ probability = model.predict_proba(input_df)[0][1]
172
+
173
+ # Display results
174
+ st.subheader("Prediction Results")
175
+
176
+ col1, col2 = st.columns(2)
177
+
178
+ with col1:
179
+ if prediction[0] == 1:
180
+ st.error("High Risk of Diabetes")
181
+ else:
182
+ st.success("Low Risk of Diabetes")
183
+
184
+ with col2:
185
+ st.metric(
186
+ label="Risk Probability",
187
+ value=f"{probability:.1%}"
188
+ )
189
+
190
+ # Display input summary
191
+ st.subheader("Input Summary")
192
+ summary_df = pd.DataFrame({
193
+ 'Feature': [
194
+ 'Gender', 'Age', 'Hypertension', 'Heart Disease',
195
+ 'Smoking History', 'BMI', 'HbA1c Level', 'Blood Glucose Level'
196
+ ],
197
+ 'Value': [
198
+ 'Male' if gender == 1 else 'Female',
199
+ f"{age} years",
200
+ 'Yes' if hypertension == 1 else 'No',
201
+ 'Yes' if heart_disease == 1 else 'No',
202
+ {0: "Never", 1: "Former", 2: "Current", 3: "Not Current", 4: "Ever", 5: "No Info"}[smoking_history],
203
+ f"{bmi:.1f}",
204
+ f"{hba1c_level:.1f}",
205
+ f"{blood_glucose_level:.0f}"
206
+ ]
207
+ })
208
+ st.dataframe(summary_df, hide_index=True)
209
+
210
+ except Exception as e:
211
+ st.error(f"Error making prediction: {str(e)}")
212
+ st.error("Please check the model compatibility with the input features.")
213
+
214
+ if __name__ == "__main__":
215
+ main()
diabetes_pred.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
randomforest_model.pkl ADDED
Binary file (210 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ streamlit
3
+ pandas
4
+ numpy
5
+ xgboost
6
+ scikit-learn
xgboost_model.pkl ADDED
Binary file (210 kB). View file