import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import streamlit as st from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import confusion_matrix, classification_report from io import BytesIO from PIL import Image # Function for loading and processing the dataset def load_and_process_data(file): df = pd.read_csv(file) # Streamlit handles file reading directly # Display basic information buffer = [] buffer.append(df.info()) buffer.append(df.describe().to_string()) # Correlation matrix plt.figure(figsize=(12, 10)) corr_matrix = df.corr() sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm') plt.title('Correlation Matrix') st.pyplot(plt) # Display the correlation matrix directly in Streamlit # Check for missing values missing_values = df.isnull().sum() return df, buffer, missing_values.to_string() # Function for training and evaluating models def train_and_evaluate_model(file, model_choice): df, buffer, missing_values = load_and_process_data(file) X = df.drop('target', axis=1) y = df['target'] # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Standardize the features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Define models models = { 'Logistic Regression': LogisticRegression(random_state=42), 'Decision Tree': DecisionTreeClassifier(random_state=42), 'Random Forest': RandomForestClassifier(random_state=42) } model = models[model_choice] model.fit(X_train_scaled, y_train) y_pred = model.predict(X_test_scaled) # Classification report report = classification_report(y_test, y_pred) # Confusion matrix plot cm = confusion_matrix(y_test, y_pred) plt.figure(figsize=(8, 6)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') plt.title(f'Confusion Matrix - {model_choice}') plt.xlabel('Predicted') plt.ylabel('Actual') st.pyplot(plt) # Display confusion matrix plot in Streamlit return report, buffer, missing_values # Streamlit app st.title("Heart Disease Model Training") # File upload file = st.file_uploader("Upload CSV File", type=["csv"]) if file is not None: # Model selection model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest']) if st.button("Run Model"): # Train and evaluate the model report, buffer, missing_values = train_and_evaluate_model(file, model_choice) # Display the results st.subheader("Model Results and Data Info") st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}")