|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import streamlit as st |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import StandardScaler |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.metrics import confusion_matrix, classification_report |
|
from io import BytesIO |
|
from PIL import Image |
|
|
|
|
|
def load_and_process_data(file): |
|
df = pd.read_csv(file) |
|
|
|
|
|
buffer = [] |
|
buffer.append(df.info()) |
|
buffer.append(df.describe().to_string()) |
|
|
|
|
|
plt.figure(figsize=(12, 10)) |
|
corr_matrix = df.corr() |
|
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm') |
|
plt.title('Correlation Matrix') |
|
st.pyplot(plt) |
|
|
|
|
|
missing_values = df.isnull().sum() |
|
|
|
return df, buffer, missing_values.to_string() |
|
|
|
|
|
def train_and_evaluate_model(file, model_choice): |
|
df, buffer, missing_values = load_and_process_data(file) |
|
|
|
X = df.drop('target', axis=1) |
|
y = df['target'] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
scaler = StandardScaler() |
|
X_train_scaled = scaler.fit_transform(X_train) |
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
models = { |
|
'Logistic Regression': LogisticRegression(random_state=42), |
|
'Decision Tree': DecisionTreeClassifier(random_state=42), |
|
'Random Forest': RandomForestClassifier(random_state=42) |
|
} |
|
|
|
model = models[model_choice] |
|
model.fit(X_train_scaled, y_train) |
|
y_pred = model.predict(X_test_scaled) |
|
|
|
|
|
report = classification_report(y_test, y_pred) |
|
|
|
|
|
cm = confusion_matrix(y_test, y_pred) |
|
plt.figure(figsize=(8, 6)) |
|
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') |
|
plt.title(f'Confusion Matrix - {model_choice}') |
|
plt.xlabel('Predicted') |
|
plt.ylabel('Actual') |
|
st.pyplot(plt) |
|
|
|
return report, buffer, missing_values |
|
|
|
|
|
st.title("Heart Disease Model Training") |
|
|
|
|
|
file = st.file_uploader("Upload CSV File", type=["csv"]) |
|
|
|
if file is not None: |
|
|
|
model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest']) |
|
|
|
if st.button("Run Model"): |
|
|
|
report, buffer, missing_values = train_and_evaluate_model(file, model_choice) |
|
|
|
|
|
st.subheader("Model Results and Data Info") |
|
st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}") |
|
|