oooooo / app.py
Rozeeeee's picture
Create app.py
0ab2276 verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from io import BytesIO
from PIL import Image
# Function for loading and processing the dataset
def load_and_process_data(file):
df = pd.read_csv(file) # Streamlit handles file reading directly
# Display basic information
buffer = []
buffer.append(df.info())
buffer.append(df.describe().to_string())
# Correlation matrix
plt.figure(figsize=(12, 10))
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix')
st.pyplot(plt) # Display the correlation matrix directly in Streamlit
# Check for missing values
missing_values = df.isnull().sum()
return df, buffer, missing_values.to_string()
# Function for training and evaluating models
def train_and_evaluate_model(file, model_choice):
df, buffer, missing_values = load_and_process_data(file)
X = df.drop('target', axis=1)
y = df['target']
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Define models
models = {
'Logistic Regression': LogisticRegression(random_state=42),
'Decision Tree': DecisionTreeClassifier(random_state=42),
'Random Forest': RandomForestClassifier(random_state=42)
}
model = models[model_choice]
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
# Classification report
report = classification_report(y_test, y_pred)
# Confusion matrix plot
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f'Confusion Matrix - {model_choice}')
plt.xlabel('Predicted')
plt.ylabel('Actual')
st.pyplot(plt) # Display confusion matrix plot in Streamlit
return report, buffer, missing_values
# Streamlit app
st.title("Heart Disease Model Training")
# File upload
file = st.file_uploader("Upload CSV File", type=["csv"])
if file is not None:
# Model selection
model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest'])
if st.button("Run Model"):
# Train and evaluate the model
report, buffer, missing_values = train_and_evaluate_model(file, model_choice)
# Display the results
st.subheader("Model Results and Data Info")
st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}")