Spaces:

hydraadra112
/

Lung_Disease_Recovery_Predictor

Sleeping

App Files Files Community

hydraadra112 commited on Mar 5

Commit

583b3ab

1 Parent(s): c2f6ae2

Created app

Browse files

Files changed (4) hide show

app.py +238 -0
model.ipynb +0 -0
models/LogisticRegression.pkl +1 -1
models/RandomForests.pkl +2 -2

app.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import streamlit as st
+import pandas as pd
+from joblib import load
+import numpy as np
+import matplotlib.pyplot as plt
+@st.cache_resource
+def load_data() -> pd.DataFrame:
+    """
+    Loads the `.csv` data using pandas
+    """
+    df = pd.read_csv('./lung_disease_data.csv')
+    numerical_columns = ['Age', 'Lung Capacity', 'Hospital Visits']
+    df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].mean())
+    # Impute categorical features with their mode (most frequent value)
+    categorical_columns = ['Gender', 'Smoking Status', 'Disease Type', 'Treatment Type', 'Recovered']
+    df[categorical_columns] = df[categorical_columns].fillna(df[categorical_columns].mode().iloc[0])
+    return df
+@st.cache_resource
+def load_models() -> dict:
+    """
+    Loads the trained models for prediction.
+    """
+    nb = load('./models/GaussianNB.pkl')
+    lg = load('./models/LogisticRegression.pkl')
+    rf = load('./models/RandomForests.pkl')
+    svm = load('./models/SVM.pkl')
+    xgb = load('./models/XGboost.pkl')
+    models = dict({
+        'Gaussian Naive Bayes': nb,
+        'Logistic Regression': lg,
+        'Random Forest': rf,
+        'Support Vector Machines': svm,
+        'XG Boost': xgb
+    })
+    return models
+def prediction(model, age: int, gender: str,
+               smoke_status: str, lung_capacity: float,
+               disease_type: str, treatment_type: str,
+               hospital_visits: int
+               ) -> int:
+    df_input = pd.DataFrame(
+        {'Age': [age],
+         'Hospital Visits': [hospital_visits],
+         'Lung Capacity': [lung_capacity],
+         'Gender': [1 if gender == "Male" else 0],
+         'Smoking Status': [1 if smoke_status == "Yes" else 0],
+         'Disease Type_Asthma': [1 if disease_type in 'Disease Type_Asthma' else 0],
+         'Disease Type_Bronchitis': [1 if disease_type in 'Disease Type_Bronchitis' else 0],
+        'Disease Type_COPD': [1 if disease_type in 'Disease Type_COPD' else 0],
+        'Disease Type_Lung Cancer': [1 if disease_type in 'Disease Type_Lung Cancer' else 0],
+        'Disease Type_Pneumonia': [1 if disease_type in 'Disease Type_Pneumonia' else 0],
+        'Treatment Type_Medication': [1 if treatment_type in 'Treatment Type_Medication' else 0],
+        'Treatment Type_Surgery': [1 if treatment_type in 'Treatment Type_Surgery' else 0],
+        'Treatment Type_Therapy': [1 if treatment_type in 'Treatment Type_Therapy' else 0]
+         }
+    )
+    input_arr = np.array(df_input)
+    prediction = model.predict(input_arr)[0]
+    return prediction.item()
+def main():
+    st.header("Lung Disease Recovery Predictor")
+    st.caption('Prepared by `hydraadra112` | John Manuel Carado')
+    data_tab, pred_tab, data_viz = st.tabs(['About Data', 'Prediction', 'Data Viz'])
+    df = load_data()
+    with data_tab:
+        st.header('About the Data')
+        st.caption('In this tab, we will explore the particular details about our data.')
+        st.caption('Take a look at the data table.')
+        st.dataframe(df)
+        col1, col2 = st.columns(2)
+        with col1:
+            st.caption('This dataset captures detailed information about patients suffering from various lung conditions. It includes:')
+            st.caption('**Age & Gender**: Patient demographics to understand the spread across age groups and gender.')
+            st.caption('**Smoking Status**: Whether the patient is a smoker or non-smoker.')
+            st.caption('**Lung Capacity**: Measured lung function to assess disease severity.')
+        st.caption('**Disease Type**: The specific lung condition, like COPD or Bronchitis.')
+        with col2:
+            st.caption('**Treatment Type**: Different treatments patients received, including therapy, medication, or surgery.')
+            st.caption('**Hospital Visits**: Number of visits to the hospital for managing the condition.')
+            st.caption('**Recovery Status**: Indicates whether the patient recovered after treatment.')
+        url = 'https://www.kaggle.com/datasets/samikshadalvi/lungs-diseases-dataset'
+        st.caption('For more details, check out the the original [source](%s) of the dataset.' % url)
+    with pred_tab:
+        st.header('Prediction Tab')
+        st.caption('In this tab, our ML models will predict if you will recover based on your data.')
+        models = load_models()
+        model = st.selectbox('Select preferred model for prediction', models.keys())
+        model_predictor = models[model]
+        col1, col2 = st.columns(2)
+        with col1:
+            # age
+            age = st.number_input('What is your age?', min_value=0, max_value=100)
+            gender = st.radio('What is your gender?', df['Gender'].unique())
+            disease = st.selectbox('What is your lung condition?', df['Disease Type'].unique())
+            treatment = st.selectbox('Which treatment did you receive?', df['Treatment Type'].unique())
+        with col2:
+            visits = st.number_input('How many times do you visit the hospital? (Annually)', min_value=0, max_value=365)
+            capacity = st.slider('What is your lung capacity?', min_value=1.00, max_value=df['Lung Capacity'].max()+5)
+            smoke = st.radio('Do you smoke?', ['Yes', 'No'])
+        if st.button('Predict!'):
+            pred = prediction(model_predictor, age, gender, smoke, capacity, disease, treatment, visits)
+            rec = 'Recovered!' if pred == 1 else 'I am sorry.'
+            st.header(rec)
+    with data_viz:
+        st.title('Data Viz Tab')
+        st.caption('In this tab, we can visualize the relationships among our data.')
+        st.caption('See our pre-existing plots and you can also plot your own!')
+        dviz_tab1, dviz_tab2 = st.tabs(['Plots', 'Custom Plot'])
+        with dviz_tab1:
+            st.title('Feature Distribution and Relationships')
+            st.caption('In this tab we will see the feature distributions of the dataset.')
+            st.caption('We can see the relationships of the features among each other.')
+            # Create subplots
+            fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(15, 25))
+            # Age distribution
+            axes[0, 0].hist(df['Age'])
+            axes[0, 0].set_xlabel('Age')
+            axes[0, 0].set_ylabel('Frequency')
+            axes[0, 0].set_title('Age Distribution')
+            # Lung Capacity distribution
+            axes[0, 1].hist(df['Lung Capacity'])
+            axes[0, 1].set_xlabel('Lung Capacity')
+            axes[0, 1].set_ylabel('Frequency')
+            axes[0, 1].set_title('Lung Capacity Distribution')
+            # Hospital Visits distribution
+            axes[1, 0].hist(df['Hospital Visits'])
+            axes[1, 0].set_xlabel('Hospital Visits')
+            axes[1, 0].set_ylabel('Frequency')
+            axes[1, 0].set_title('Hospital Visits Distribution')
+            # Gender vs Recovered
+            count_data = df.groupby(['Gender', 'Recovered']).size().unstack(fill_value=0)
+            count_data.plot(kind='bar', stacked=False, ax=axes[1, 1])
+            axes[1, 1].set_xlabel('Gender')
+            axes[1, 1].set_ylabel('Count')
+            axes[1, 1].set_title('Gender Count by Recovery')
+            axes[1, 1].legend(title='Recovered')
+            # Smoking vs Recovered
+            count_data = df.groupby(['Smoking Status', 'Recovered']).size().unstack(fill_value=0)
+            count_data.plot(kind='bar', stacked=False, ax=axes[2, 0])
+            axes[2, 0].set_xlabel('Smoking Status')
+            axes[2, 0].set_ylabel('Count')
+            axes[2, 0].set_title('Smoking Status by Recovery')
+            axes[2, 0].legend(title='Recovered')
+            # Disease Type vs Recovered
+            count_data = df.groupby(['Disease Type', 'Recovered']).size().unstack(fill_value=0)
+            count_data.plot(kind='bar', stacked=False, ax=axes[2, 1])
+            axes[2, 1].set_xlabel('Disease Type')
+            axes[2, 1].set_ylabel('Count')
+            axes[2, 1].set_title('Disease Type by Recovery')
+            axes[2, 1].legend(title='Recovered')
+            # Treatment Type vs Recovered
+            count_data = df.groupby(['Treatment Type', 'Recovered']).size().unstack(fill_value=0)
+            count_data.plot(kind='bar', stacked=False, ax=axes[3, 0])
+            axes[3, 0].set_xlabel('Treatment Type')
+            axes[3, 0].set_ylabel('Count')
+            axes[3, 0].set_title('Treatment Type by Recovery')
+            axes[3, 0].legend(title='Recovered')
+            # Disease Type vs Treatment Type
+            count_data = df.groupby(['Disease Type', 'Treatment Type']).size().unstack(fill_value=0)
+            count_data.plot(kind='bar', stacked=False, ax=axes[3, 1])
+            axes[3, 1].set_xlabel('Disease Type')
+            axes[3, 1].set_ylabel('Count')
+            axes[3, 1].set_title('Disease Type by Treatment Type')
+            axes[3, 1].legend(title='Treatment')
+            st.pyplot(fig)
+            plt.tight_layout()
+        with dviz_tab2:
+            x = st.selectbox("Choose X for plotting.", tuple(df.columns))
+            y = st.selectbox("Choose Y for plotting.", tuple(df.drop(x, axis=1).columns))
+            plot = st.selectbox("Select type of plot.", ("Scatter", "Bar", "Line"))
+            if st.button("Plot X and Y!"):
+                if plot == "Scatter":
+                    st.scatter_chart(
+                        data=df,
+                        x=x,
+                        y=y,
+                        size='Recovered'
+                    )
+                elif plot == "Bar":
+                    st.bar_chart(
+                        data=df,
+                        x=x,
+                        y=y
+                    )
+                elif plot == "Line":
+                    st.line_chart(
+                        data=df,
+                        x=x,
+                        y=y
+                        )
+if __name__ == "__main__":
+    main()

model.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

models/LogisticRegression.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca87fd0d34064c7b64bb3a7dac630210a7f8863f0f801818b3c474f1de89a71a
 size 975

 version https://git-lfs.github.com/spec/v1
+oid sha256:ede5d2d39b97307cf0645d445fd88119eb980d8d850626eb92cb31945bab01f2
 size 975

models/RandomForests.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e3ab0c16355bbfefae6ea3cb5b0eee967d37220acba84799db5751c87556cf1
-size 46849593

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7a94e320fd2a2a93f908b6dfc63d3066e86eb4682298d9b3e279a951a2f7fd3
+size 46939833