hydraadra112 commited on
Commit
583b3ab
·
1 Parent(s): c2f6ae2

Created app

Browse files
app.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from joblib import load
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+
7
+ @st.cache_resource
8
+ def load_data() -> pd.DataFrame:
9
+ """
10
+ Loads the `.csv` data using pandas
11
+ """
12
+ df = pd.read_csv('./lung_disease_data.csv')
13
+
14
+ numerical_columns = ['Age', 'Lung Capacity', 'Hospital Visits']
15
+ df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].mean())
16
+
17
+ # Impute categorical features with their mode (most frequent value)
18
+ categorical_columns = ['Gender', 'Smoking Status', 'Disease Type', 'Treatment Type', 'Recovered']
19
+ df[categorical_columns] = df[categorical_columns].fillna(df[categorical_columns].mode().iloc[0])
20
+
21
+ return df
22
+
23
+ @st.cache_resource
24
+ def load_models() -> dict:
25
+ """
26
+ Loads the trained models for prediction.
27
+ """
28
+ nb = load('./models/GaussianNB.pkl')
29
+ lg = load('./models/LogisticRegression.pkl')
30
+ rf = load('./models/RandomForests.pkl')
31
+ svm = load('./models/SVM.pkl')
32
+ xgb = load('./models/XGboost.pkl')
33
+
34
+ models = dict({
35
+ 'Gaussian Naive Bayes': nb,
36
+ 'Logistic Regression': lg,
37
+ 'Random Forest': rf,
38
+ 'Support Vector Machines': svm,
39
+ 'XG Boost': xgb
40
+ })
41
+
42
+ return models
43
+
44
+ def prediction(model, age: int, gender: str,
45
+ smoke_status: str, lung_capacity: float,
46
+ disease_type: str, treatment_type: str,
47
+ hospital_visits: int
48
+ ) -> int:
49
+
50
+ df_input = pd.DataFrame(
51
+ {'Age': [age],
52
+ 'Hospital Visits': [hospital_visits],
53
+ 'Lung Capacity': [lung_capacity],
54
+ 'Gender': [1 if gender == "Male" else 0],
55
+ 'Smoking Status': [1 if smoke_status == "Yes" else 0],
56
+ 'Disease Type_Asthma': [1 if disease_type in 'Disease Type_Asthma' else 0],
57
+ 'Disease Type_Bronchitis': [1 if disease_type in 'Disease Type_Bronchitis' else 0],
58
+ 'Disease Type_COPD': [1 if disease_type in 'Disease Type_COPD' else 0],
59
+ 'Disease Type_Lung Cancer': [1 if disease_type in 'Disease Type_Lung Cancer' else 0],
60
+ 'Disease Type_Pneumonia': [1 if disease_type in 'Disease Type_Pneumonia' else 0],
61
+
62
+ 'Treatment Type_Medication': [1 if treatment_type in 'Treatment Type_Medication' else 0],
63
+ 'Treatment Type_Surgery': [1 if treatment_type in 'Treatment Type_Surgery' else 0],
64
+ 'Treatment Type_Therapy': [1 if treatment_type in 'Treatment Type_Therapy' else 0]
65
+ }
66
+ )
67
+
68
+ input_arr = np.array(df_input)
69
+
70
+ prediction = model.predict(input_arr)[0]
71
+
72
+ return prediction.item()
73
+
74
+ def main():
75
+ st.header("Lung Disease Recovery Predictor")
76
+ st.caption('Prepared by `hydraadra112` | John Manuel Carado')
77
+
78
+ data_tab, pred_tab, data_viz = st.tabs(['About Data', 'Prediction', 'Data Viz'])
79
+ df = load_data()
80
+
81
+ with data_tab:
82
+ st.header('About the Data')
83
+ st.caption('In this tab, we will explore the particular details about our data.')
84
+
85
+ st.caption('Take a look at the data table.')
86
+ st.dataframe(df)
87
+
88
+ col1, col2 = st.columns(2)
89
+
90
+ with col1:
91
+ st.caption('This dataset captures detailed information about patients suffering from various lung conditions. It includes:')
92
+ st.caption('**Age & Gender**: Patient demographics to understand the spread across age groups and gender.')
93
+ st.caption('**Smoking Status**: Whether the patient is a smoker or non-smoker.')
94
+ st.caption('**Lung Capacity**: Measured lung function to assess disease severity.')
95
+ st.caption('**Disease Type**: The specific lung condition, like COPD or Bronchitis.')
96
+
97
+ with col2:
98
+ st.caption('**Treatment Type**: Different treatments patients received, including therapy, medication, or surgery.')
99
+ st.caption('**Hospital Visits**: Number of visits to the hospital for managing the condition.')
100
+ st.caption('**Recovery Status**: Indicates whether the patient recovered after treatment.')
101
+
102
+ url = 'https://www.kaggle.com/datasets/samikshadalvi/lungs-diseases-dataset'
103
+ st.caption('For more details, check out the the original [source](%s) of the dataset.' % url)
104
+
105
+ with pred_tab:
106
+ st.header('Prediction Tab')
107
+ st.caption('In this tab, our ML models will predict if you will recover based on your data.')
108
+
109
+ models = load_models()
110
+
111
+ model = st.selectbox('Select preferred model for prediction', models.keys())
112
+ model_predictor = models[model]
113
+
114
+ col1, col2 = st.columns(2)
115
+
116
+
117
+ with col1:
118
+ # age
119
+ age = st.number_input('What is your age?', min_value=0, max_value=100)
120
+ gender = st.radio('What is your gender?', df['Gender'].unique())
121
+ disease = st.selectbox('What is your lung condition?', df['Disease Type'].unique())
122
+ treatment = st.selectbox('Which treatment did you receive?', df['Treatment Type'].unique())
123
+
124
+ with col2:
125
+ visits = st.number_input('How many times do you visit the hospital? (Annually)', min_value=0, max_value=365)
126
+ capacity = st.slider('What is your lung capacity?', min_value=1.00, max_value=df['Lung Capacity'].max()+5)
127
+ smoke = st.radio('Do you smoke?', ['Yes', 'No'])
128
+
129
+ if st.button('Predict!'):
130
+ pred = prediction(model_predictor, age, gender, smoke, capacity, disease, treatment, visits)
131
+ rec = 'Recovered!' if pred == 1 else 'I am sorry.'
132
+ st.header(rec)
133
+
134
+ with data_viz:
135
+ st.title('Data Viz Tab')
136
+ st.caption('In this tab, we can visualize the relationships among our data.')
137
+ st.caption('See our pre-existing plots and you can also plot your own!')
138
+
139
+ dviz_tab1, dviz_tab2 = st.tabs(['Plots', 'Custom Plot'])
140
+
141
+ with dviz_tab1:
142
+ st.title('Feature Distribution and Relationships')
143
+ st.caption('In this tab we will see the feature distributions of the dataset.')
144
+ st.caption('We can see the relationships of the features among each other.')
145
+
146
+ # Create subplots
147
+ fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(15, 25))
148
+
149
+ # Age distribution
150
+ axes[0, 0].hist(df['Age'])
151
+ axes[0, 0].set_xlabel('Age')
152
+ axes[0, 0].set_ylabel('Frequency')
153
+ axes[0, 0].set_title('Age Distribution')
154
+
155
+ # Lung Capacity distribution
156
+ axes[0, 1].hist(df['Lung Capacity'])
157
+ axes[0, 1].set_xlabel('Lung Capacity')
158
+ axes[0, 1].set_ylabel('Frequency')
159
+ axes[0, 1].set_title('Lung Capacity Distribution')
160
+
161
+ # Hospital Visits distribution
162
+ axes[1, 0].hist(df['Hospital Visits'])
163
+ axes[1, 0].set_xlabel('Hospital Visits')
164
+ axes[1, 0].set_ylabel('Frequency')
165
+ axes[1, 0].set_title('Hospital Visits Distribution')
166
+
167
+ # Gender vs Recovered
168
+ count_data = df.groupby(['Gender', 'Recovered']).size().unstack(fill_value=0)
169
+ count_data.plot(kind='bar', stacked=False, ax=axes[1, 1])
170
+ axes[1, 1].set_xlabel('Gender')
171
+ axes[1, 1].set_ylabel('Count')
172
+ axes[1, 1].set_title('Gender Count by Recovery')
173
+ axes[1, 1].legend(title='Recovered')
174
+
175
+ # Smoking vs Recovered
176
+ count_data = df.groupby(['Smoking Status', 'Recovered']).size().unstack(fill_value=0)
177
+ count_data.plot(kind='bar', stacked=False, ax=axes[2, 0])
178
+ axes[2, 0].set_xlabel('Smoking Status')
179
+ axes[2, 0].set_ylabel('Count')
180
+ axes[2, 0].set_title('Smoking Status by Recovery')
181
+ axes[2, 0].legend(title='Recovered')
182
+
183
+ # Disease Type vs Recovered
184
+ count_data = df.groupby(['Disease Type', 'Recovered']).size().unstack(fill_value=0)
185
+ count_data.plot(kind='bar', stacked=False, ax=axes[2, 1])
186
+ axes[2, 1].set_xlabel('Disease Type')
187
+ axes[2, 1].set_ylabel('Count')
188
+ axes[2, 1].set_title('Disease Type by Recovery')
189
+ axes[2, 1].legend(title='Recovered')
190
+
191
+ # Treatment Type vs Recovered
192
+ count_data = df.groupby(['Treatment Type', 'Recovered']).size().unstack(fill_value=0)
193
+ count_data.plot(kind='bar', stacked=False, ax=axes[3, 0])
194
+ axes[3, 0].set_xlabel('Treatment Type')
195
+ axes[3, 0].set_ylabel('Count')
196
+ axes[3, 0].set_title('Treatment Type by Recovery')
197
+ axes[3, 0].legend(title='Recovered')
198
+
199
+ # Disease Type vs Treatment Type
200
+ count_data = df.groupby(['Disease Type', 'Treatment Type']).size().unstack(fill_value=0)
201
+ count_data.plot(kind='bar', stacked=False, ax=axes[3, 1])
202
+ axes[3, 1].set_xlabel('Disease Type')
203
+ axes[3, 1].set_ylabel('Count')
204
+ axes[3, 1].set_title('Disease Type by Treatment Type')
205
+ axes[3, 1].legend(title='Treatment')
206
+
207
+ st.pyplot(fig)
208
+ plt.tight_layout()
209
+
210
+ with dviz_tab2:
211
+ x = st.selectbox("Choose X for plotting.", tuple(df.columns))
212
+ y = st.selectbox("Choose Y for plotting.", tuple(df.drop(x, axis=1).columns))
213
+
214
+ plot = st.selectbox("Select type of plot.", ("Scatter", "Bar", "Line"))
215
+
216
+ if st.button("Plot X and Y!"):
217
+ if plot == "Scatter":
218
+ st.scatter_chart(
219
+ data=df,
220
+ x=x,
221
+ y=y,
222
+ size='Recovered'
223
+ )
224
+ elif plot == "Bar":
225
+ st.bar_chart(
226
+ data=df,
227
+ x=x,
228
+ y=y
229
+ )
230
+ elif plot == "Line":
231
+ st.line_chart(
232
+ data=df,
233
+ x=x,
234
+ y=y
235
+ )
236
+
237
+ if __name__ == "__main__":
238
+ main()
model.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
models/LogisticRegression.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca87fd0d34064c7b64bb3a7dac630210a7f8863f0f801818b3c474f1de89a71a
3
  size 975
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede5d2d39b97307cf0645d445fd88119eb980d8d850626eb92cb31945bab01f2
3
  size 975
models/RandomForests.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e3ab0c16355bbfefae6ea3cb5b0eee967d37220acba84799db5751c87556cf1
3
- size 46849593
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7a94e320fd2a2a93f908b6dfc63d3066e86eb4682298d9b3e279a951a2f7fd3
3
+ size 46939833