Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
import streamlit as st
|
6 |
+
from sklearn.model_selection import train_test_split
|
7 |
+
from sklearn.preprocessing import StandardScaler
|
8 |
+
from sklearn.linear_model import LogisticRegression
|
9 |
+
from sklearn.tree import DecisionTreeClassifier
|
10 |
+
from sklearn.ensemble import RandomForestClassifier
|
11 |
+
from sklearn.metrics import confusion_matrix, classification_report
|
12 |
+
from io import BytesIO
|
13 |
+
from PIL import Image
|
14 |
+
|
15 |
+
# Function for loading and processing the dataset
|
16 |
+
def load_and_process_data(file):
|
17 |
+
df = pd.read_csv(file) # Streamlit handles file reading directly
|
18 |
+
|
19 |
+
# Display basic information
|
20 |
+
buffer = []
|
21 |
+
buffer.append(df.info())
|
22 |
+
buffer.append(df.describe().to_string())
|
23 |
+
|
24 |
+
# Correlation matrix
|
25 |
+
plt.figure(figsize=(12, 10))
|
26 |
+
corr_matrix = df.corr()
|
27 |
+
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm')
|
28 |
+
plt.title('Correlation Matrix')
|
29 |
+
st.pyplot(plt) # Display the correlation matrix directly in Streamlit
|
30 |
+
|
31 |
+
# Check for missing values
|
32 |
+
missing_values = df.isnull().sum()
|
33 |
+
|
34 |
+
return df, buffer, missing_values.to_string()
|
35 |
+
|
36 |
+
# Function for training and evaluating models
|
37 |
+
def train_and_evaluate_model(file, model_choice):
|
38 |
+
df, buffer, missing_values = load_and_process_data(file)
|
39 |
+
|
40 |
+
X = df.drop('target', axis=1)
|
41 |
+
y = df['target']
|
42 |
+
|
43 |
+
# Train-test split
|
44 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
45 |
+
|
46 |
+
# Standardize the features
|
47 |
+
scaler = StandardScaler()
|
48 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
49 |
+
X_test_scaled = scaler.transform(X_test)
|
50 |
+
|
51 |
+
# Define models
|
52 |
+
models = {
|
53 |
+
'Logistic Regression': LogisticRegression(random_state=42),
|
54 |
+
'Decision Tree': DecisionTreeClassifier(random_state=42),
|
55 |
+
'Random Forest': RandomForestClassifier(random_state=42)
|
56 |
+
}
|
57 |
+
|
58 |
+
model = models[model_choice]
|
59 |
+
model.fit(X_train_scaled, y_train)
|
60 |
+
y_pred = model.predict(X_test_scaled)
|
61 |
+
|
62 |
+
# Classification report
|
63 |
+
report = classification_report(y_test, y_pred)
|
64 |
+
|
65 |
+
# Confusion matrix plot
|
66 |
+
cm = confusion_matrix(y_test, y_pred)
|
67 |
+
plt.figure(figsize=(8, 6))
|
68 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
|
69 |
+
plt.title(f'Confusion Matrix - {model_choice}')
|
70 |
+
plt.xlabel('Predicted')
|
71 |
+
plt.ylabel('Actual')
|
72 |
+
st.pyplot(plt) # Display confusion matrix plot in Streamlit
|
73 |
+
|
74 |
+
return report, buffer, missing_values
|
75 |
+
|
76 |
+
# Streamlit app
|
77 |
+
st.title("Heart Disease Model Training")
|
78 |
+
|
79 |
+
# File upload
|
80 |
+
file = st.file_uploader("Upload CSV File", type=["csv"])
|
81 |
+
|
82 |
+
if file is not None:
|
83 |
+
# Model selection
|
84 |
+
model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest'])
|
85 |
+
|
86 |
+
if st.button("Run Model"):
|
87 |
+
# Train and evaluate the model
|
88 |
+
report, buffer, missing_values = train_and_evaluate_model(file, model_choice)
|
89 |
+
|
90 |
+
# Display the results
|
91 |
+
st.subheader("Model Results and Data Info")
|
92 |
+
st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}")
|