Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import plotly.express as px | |
import numpy as np | |
import xgboost as xgb | |
import os | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
# Set page configuration at the very top | |
st.set_page_config(page_title="Healthcare Dashboard", layout="wide", page_icon="π‘") | |
# Define human-readable labels for prediction outcomes | |
OUTCOME_MAP = { | |
0: "Patient recovered and went home", | |
1: "Transferred to another hospital or care center", | |
2: "Transferred to a rehab center", | |
3: "Left the hospital early without approval", | |
4: "Passed away or had a very serious event", | |
} | |
# Function to load the model | |
def load_model(): | |
try: | |
model = xgb.XGBClassifier() | |
model.load_model("xgboost_patient_model.json") | |
return model | |
except Exception as e: | |
st.error(f"Error loading model: {e}") | |
return None | |
# Ensure data matches model's feature requirements | |
def preprocess_data(data, model_features): | |
try: | |
data = data.apply(pd.to_numeric, errors='coerce') | |
missing_features = [f for f in model_features if f not in data.columns] | |
extra_features = [f for f in data.columns if f not in model_features] | |
if missing_features: | |
st.error(f"β Missing required features: {missing_features}") | |
return None | |
if extra_features: | |
st.warning(f"β οΈ Extra features in uploaded data: {extra_features}") | |
return data[model_features] | |
except Exception as e: | |
st.error(f"Data preprocessing error: {e}") | |
return None | |
# Predict patient outcomes | |
def predict_outcome(model, data): | |
if model is None: | |
return None, None, None, None | |
actual_target = data.pop("target") if "target" in data.columns else None | |
try: | |
model_features = model.get_booster().feature_names | |
data = preprocess_data(data, model_features) | |
if data is None: | |
return None, None, None, None | |
predictions = model.predict(data) | |
# Convert numerical predictions to human-readable labels | |
mapped_predictions = [OUTCOME_MAP[pred] for pred in predictions] | |
actual_labels = [OUTCOME_MAP[actual] for actual in actual_target] if actual_target is not None else ["N/A"] * len(predictions) | |
# Debugging information | |
if actual_target is not None: | |
correct_predictions = (predictions == actual_target).sum() | |
total_predictions = len(actual_target) | |
accuracy = (correct_predictions / total_predictions) * 100 | |
st.write(f"β Correct Predictions: {correct_predictions}/{total_predictions}") | |
st.write(f"π Model Accuracy: **{accuracy:.2f}%**") | |
return actual_target, predictions, mapped_predictions, actual_labels | |
except Exception as e: | |
st.error(f"Prediction error: {e}") | |
return None, None, None, None | |
# Load the data | |
file_path = 'final_cleaned_patient_data.csv' | |
try: | |
df = pd.read_csv(file_path) | |
except Exception as e: | |
st.error(f"Error loading data: {e}") | |
df = pd.DataFrame() # Create empty DataFrame if file doesn't exist | |
# Sidebar navigation | |
st.sidebar.title('Healthcare Data Dashboard') | |
# Team Members Section | |
st.sidebar.markdown("### π Team Members:") | |
team_members = [ | |
"1. R. Sai Somnath", | |
"2. S. Sreevardhan", | |
"3. S. Mohammad Basha", | |
"4. V. Hussain Basha", | |
"5. P. Charles" | |
] | |
for member in team_members: | |
st.sidebar.text(member) | |
# Add a divider | |
st.sidebar.markdown("---") | |
# Section navigation - Added two new sections | |
option = st.sidebar.selectbox('Choose a section', [ | |
'Data Overview', | |
'Data Visualization', | |
'Interactive Reports', | |
'Correlation Analysis', | |
'Data Insights', | |
'Patient Outcome Prediction', | |
'Batch Prediction', | |
'Model Performance' | |
]) | |
# Apply a Streamlit theme with a dark background for a modern look | |
st.markdown(""" | |
<style> | |
h1 { color: #00FFAA; } | |
.stApp { background-color: #121212; color: #FFFFFF; } | |
.sidebar .sidebar-content { background-color: #333333; color: #FFFFFF; } | |
.css-1d391kg { color: #FFFFFF; } | |
.css-18e3th9 { background-color: #1E1E1E; } | |
</style> | |
""", unsafe_allow_html=True) | |
# Data Overview Section | |
if option == 'Data Overview': | |
st.title('π Data Overview') | |
st.write(df.head()) | |
st.write(f"Dataset Shape: {df.shape}") | |
st.write(f"Column Names: {df.columns.tolist()}") | |
st.write("Basic Statistical Overview:") | |
st.write(df.describe()) | |
if st.checkbox('Show Missing Values'): | |
st.write(df.isnull().sum()) | |
# Data Visualization Section | |
elif option == 'Data Visualization': | |
st.title('π Data Visualization') | |
column = st.selectbox('Select Column for Visualization', df.columns) | |
plot_type = st.radio('Choose plot type', ['Histogram', 'Boxplot', 'Violin Plot', 'Scatter Plot', 'Line Plot', 'Animated Plot']) | |
if plot_type == 'Animated Plot': | |
time_col = st.selectbox('Select Time Column (if applicable)', df.columns) | |
fig = px.scatter(df, x=column, y=column, animation_frame=time_col, size_max=60) | |
elif plot_type == 'Histogram': | |
fig = px.histogram(df, x=column, marginal='box', nbins=30) | |
elif plot_type == 'Boxplot': | |
fig = px.box(df, y=column) | |
elif plot_type == 'Violin Plot': | |
fig = px.violin(df, y=column, box=True, points='all') | |
elif plot_type == 'Scatter Plot': | |
x_col = st.selectbox('Select X axis', df.columns) | |
fig = px.scatter(df, x=x_col, y=column, color=column) | |
elif plot_type == 'Line Plot': | |
x_col = st.selectbox('Select X axis for Line Plot', df.columns) | |
fig = px.line(df, x=x_col, y=column) | |
st.plotly_chart(fig) | |
# Correlation Analysis Section | |
elif option == 'Correlation Analysis': | |
st.title('π Correlation Analysis') | |
corr_matrix = df.corr() | |
fig, ax = plt.subplots(figsize=(12, 8)) | |
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax) | |
st.pyplot(fig) | |
# Interactive Reports Section | |
elif option == 'Interactive Reports': | |
st.title('π Interactive Reports') | |
st.write("Filter and explore the data.") | |
selected_columns = st.multiselect('Select columns to display', df.columns) | |
st.dataframe(df[selected_columns] if selected_columns else df) | |
st.write("Filter the Data:") | |
filter_column = st.selectbox('Select column to filter by', df.columns) | |
filter_value = st.text_input('Enter filter value') | |
if filter_value: | |
filtered_data = df[df[filter_column].astype(str).str.contains(filter_value, case=False)] | |
st.write(filtered_data) | |
# Download option | |
csv_data = filtered_data.to_csv(index=False).encode('utf-8') | |
st.download_button(label='Download Filtered Data as CSV', data=csv_data, file_name='filtered_data.csv', mime='text/csv') | |
# Data Insights Section | |
elif option == 'Data Insights': | |
st.title('π§ Data Insights') | |
st.write("Gain insights into the data using various metrics.") | |
st.write("Total Unique Values per Column:") | |
st.write(df.nunique()) | |
st.write("Top 5 Frequent Values for Each Column:") | |
for col in df.columns: | |
st.write(f"{col}: {df[col].value_counts().head(5)}") | |
# Patient Outcome Prediction Section | |
elif option == 'Patient Outcome Prediction': | |
st.title('π€ Patient Outcome Prediction') | |
# Load the pre-trained model | |
model = load_model() | |
if model is not None: | |
st.success("β Pre-trained model loaded successfully!") | |
# Define class descriptions | |
class_descriptions = { | |
0: "Patient recovered and went home", | |
1: "Patient transferred to another hospital", | |
2: "Patient moved to rehab facility", | |
3: "Patient left against medical advice", | |
4: "Patient deceased or serious outcome" | |
} | |
# Display target class distribution if target column exists | |
target_column = 'target' | |
if target_column in df.columns: | |
st.subheader("Target Class Distribution") | |
target_counts = df[target_column].value_counts().reset_index() | |
target_counts.columns = ['Class', 'Count'] | |
target_counts['Description'] = target_counts['Class'].map(class_descriptions) | |
st.write(target_counts) | |
fig = px.pie(target_counts, values='Count', names='Description', title='Target Class Distribution') | |
st.plotly_chart(fig) | |
# Prediction interface | |
st.subheader("Make Predictions") | |
st.write("Enter values for the features to predict the patient outcome:") | |
# Create a more interactive UI for prediction with all input values | |
col1, col2, col3 = st.columns(3) | |
# Create input fields for all required features | |
input_values = {} | |
with col1: | |
input_values['age'] = st.number_input("Age", min_value=0, max_value=120, value=51) | |
input_values['gender'] = st.selectbox("Gender", [0, 1], index=1, format_func=lambda x: "Male" if x == 0 else "Female") | |
input_values['previous_hospitalizations'] = st.number_input("Previous Hospitalizations", min_value=0, value=4) | |
input_values['heart_rate'] = st.number_input("Heart Rate", min_value=30, max_value=200, value=63) | |
input_values['respiratory_rate'] = st.number_input("Respiratory Rate", min_value=5, max_value=60, value=16) | |
input_values['blood_pressure_sys'] = st.number_input("Blood Pressure (Systolic)", min_value=50, max_value=250, value=86) | |
input_values['blood_pressure_dia'] = st.number_input("Blood Pressure (Diastolic)", min_value=30, max_value=150, value=58) | |
input_values['temperature'] = st.number_input("Temperature (Β°C)", min_value=35.0, max_value=42.0, value=35.86, step=0.1) | |
input_values['wbc_count'] = st.number_input("WBC Count", min_value=0.0, max_value=50.0, value=7.15, step=0.1) | |
input_values['creatinine'] = st.number_input("Creatinine", min_value=0.1, max_value=10.0, value=2.93, step=0.1) | |
with col2: | |
input_values['bilirubin'] = st.number_input("Bilirubin", min_value=0.1, max_value=30.0, value=1.72, step=0.1) | |
input_values['glucose'] = st.number_input("Glucose", min_value=40, max_value=500, value=137) | |
input_values['bun'] = st.number_input("BUN", min_value=5, max_value=150, value=36) | |
input_values['pH'] = st.number_input("pH", min_value=6.8, max_value=7.8, value=7.34, step=0.01) | |
input_values['pao2'] = st.number_input("PaO2", min_value=40, max_value=300, value=72) | |
input_values['pco2'] = st.number_input("PCO2", min_value=20, max_value=100, value=58) | |
input_values['fio2'] = st.number_input("FiO2", min_value=0.21, max_value=1.0, value=0.88, step=0.01) | |
input_values['gcs'] = st.slider("GCS Score", 3, 15, 5) | |
input_values['comorbidity_index'] = st.slider("Comorbidity Index", 0, 10, 1) | |
input_values['admission_source'] = st.selectbox("Admission Source", [0, 1, 2, 3], index=1, format_func=lambda x: ["Emergency", "OPD", "Transfer", "Other"][x]) | |
with col3: | |
input_values['elective_surgery'] = st.selectbox("Elective Surgery", [0, 1], index=1, format_func=lambda x: "No" if x == 0 else "Yes") | |
input_values['num_medications'] = st.number_input("Number of Medications", min_value=0, value=18) | |
input_values['charlson_comorbidity_index'] = st.slider("Charlson Comorbidity Index", 0, 15, 1) | |
input_values['ews_score'] = st.slider("EWS Score", 0, 20, 7) | |
input_values['severity_score'] = st.slider("Severity Score", 0, 10, 4) | |
input_values['bed_occupancy_rate'] = st.slider("Bed Occupancy Rate (%)", 50, 100, int(68.67)) | |
input_values['staff_to_patient_ratio'] = st.slider("Staff to Patient Ratio", 0.1, 2.0, 0.99, step=0.1) | |
input_values['past_icu_admissions'] = st.number_input("Past ICU Admissions", min_value=0, value=2) | |
input_values['previous_surgery'] = st.selectbox("Previous Surgery", [0, 1], index=1, format_func=lambda x: "No" if x == 0 else "Yes") | |
input_values['high_risk_treatment'] = st.selectbox("High Risk Treatment", [0, 1], index=1, format_func=lambda x: "No" if x == 0 else "Yes") | |
input_values['discharge_support'] = st.selectbox("Discharge Support", [0, 1], index=0, format_func=lambda x: "No" if x == 0 else "Yes") | |
if st.button("Predict Outcome"): | |
# Define input columns (must match your model's expected input features) | |
input_columns = [ | |
'age', 'gender', 'previous_hospitalizations', 'heart_rate', | |
'respiratory_rate', 'blood_pressure_sys', 'blood_pressure_dia', | |
'temperature', 'wbc_count', 'creatinine', 'bilirubin', 'glucose', 'bun', | |
'pH', 'pao2', 'pco2', 'fio2', 'gcs', 'comorbidity_index', | |
'admission_source', 'elective_surgery', 'num_medications', | |
'charlson_comorbidity_index', 'ews_score', 'severity_score', | |
'bed_occupancy_rate', 'staff_to_patient_ratio', 'past_icu_admissions', | |
'previous_surgery', 'high_risk_treatment', 'discharge_support' | |
] | |
# Create a sample input for prediction (using a template from your dataset) | |
if len(df) > 0: | |
sample_input = pd.DataFrame([{col: 0 for col in input_columns}]) | |
# Update with user inputs | |
for feature, value in input_values.items(): | |
if feature in sample_input.columns: | |
sample_input[feature] = value | |
# Make prediction | |
try: | |
prediction = model.predict(sample_input)[0] | |
prediction_proba = model.predict_proba(sample_input)[0] | |
# Display prediction | |
st.subheader("Prediction Result") | |
st.write(f"Predicted Class: {prediction} - {class_descriptions.get(prediction, 'Unknown')}") | |
# Display probability for each class | |
st.write("Prediction Probabilities:") | |
proba_df = pd.DataFrame({ | |
'Class': [class_descriptions.get(i, f"Class {i}") for i in range(len(prediction_proba))], | |
'Probability': prediction_proba | |
}) | |
fig = px.bar(proba_df, x='Class', y='Probability', title='Prediction Probabilities') | |
st.plotly_chart(fig) | |
except Exception as e: | |
st.error(f"Error making prediction: {e}") | |
else: | |
st.error("Dataset is empty, cannot create input template.") | |
else: | |
st.error("Failed to load model. Please check if 'xgboost_patient_model.json' exists in the current directory.") | |
# NEW SECTION 1: Batch Prediction | |
elif option == 'Batch Prediction': | |
st.title("π₯ Batch Patient Outcome Prediction") | |
st.write("Upload a CSV file with patient data to predict outcomes for multiple patients at once.") | |
uploaded_file = st.file_uploader("π Upload CSV file", type=["csv"]) | |
if uploaded_file is not None: | |
batch_df = pd.read_csv(uploaded_file) | |
batch_df = batch_df.dropna().reset_index(drop=True) | |
st.write("## Preview of Uploaded Data") | |
st.dataframe(batch_df.head(), use_container_width=True) | |
model = load_model() | |
actual_target, predicted_classes, predicted_outcomes, actual_outcomes = predict_outcome(model, batch_df.copy()) | |
if predicted_classes is not None: | |
st.write("## π₯ Prediction Results") | |
result_df = pd.DataFrame({ | |
"Patient ID": range(1, len(predicted_classes) + 1), | |
"Actual Class": actual_target if actual_target is not None else ["N/A"] * len(predicted_classes), | |
"Predicted Class": predicted_classes, | |
"Predicted Outcome": predicted_outcomes | |
}) | |
st.dataframe(result_df, use_container_width=True) | |
# Add visualization of batch prediction results | |
st.write("## Prediction Distribution") | |
results_count = pd.Series(predicted_outcomes).value_counts().reset_index() | |
results_count.columns = ['Predicted Outcome', 'Count'] | |
fig = px.pie(results_count, values='Count', names='Predicted Outcome', | |
title='Distribution of Predicted Outcomes') | |
st.plotly_chart(fig) | |
# Offer download of results | |
csv_results = result_df.to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label="Download Prediction Results", | |
data=csv_results, | |
file_name="patient_predictions.csv", | |
mime="text/csv" | |
) | |
# NEW SECTION 2: Model Performance | |
elif option == 'Model Performance': | |
st.title("π Model Performance Analysis") | |
# Check if data exists and contains target variable | |
if len(df) > 0 and 'target' in df.columns: | |
st.write("Analyze the model's performance on the dataset.") | |
# Split data into features and target | |
X = df.drop(columns=["target"]) # Features | |
y = df["target"] # Target | |
# Split data for testing | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.2, random_state=42, stratify=y | |
) | |
# Load the model | |
model = load_model() | |
if model is not None: | |
# Make predictions | |
try: | |
y_pred = model.predict(X_test) | |
y_prob = model.predict_proba(X_test) | |
# Calculate metrics | |
accuracy = accuracy_score(y_test, y_pred) | |
conf_matrix = confusion_matrix(y_test, y_pred) | |
class_report = classification_report(y_test, y_pred, output_dict=True) | |
# Display metrics | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Model Accuracy", f"{accuracy:.2%}") | |
# Plot confusion matrix | |
st.write("### Confusion Matrix") | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax) | |
ax.set_xlabel('Predicted Labels') | |
ax.set_ylabel('True Labels') | |
st.pyplot(fig) | |
with col2: | |
# Plot classification report | |
st.write("### Classification Report") | |
report_df = pd.DataFrame(class_report).transpose() | |
st.dataframe(report_df.style.format({ | |
'precision': '{:.2f}', | |
'recall': '{:.2f}', | |
'f1-score': '{:.2f}', | |
'support': '{:.0f}' | |
})) | |
except Exception as e: | |
st.error(f"Error performing analysis: {e}") | |
else: | |
st.error("Model could not be loaded. Please check if the model file exists.") | |
else: | |
st.error("Cannot perform model analysis. Dataset is empty or missing target variable.") | |
st.sidebar.write("Forecasting discharge outcomes for critically ILL patients using machine learning") |