Rozeeeee commited on
Commit
0ab2276
·
verified ·
1 Parent(s): ee60f4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import streamlit as st
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.linear_model import LogisticRegression
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.ensemble import RandomForestClassifier
11
+ from sklearn.metrics import confusion_matrix, classification_report
12
+ from io import BytesIO
13
+ from PIL import Image
14
+
15
+ # Function for loading and processing the dataset
16
+ def load_and_process_data(file):
17
+ df = pd.read_csv(file) # Streamlit handles file reading directly
18
+
19
+ # Display basic information
20
+ buffer = []
21
+ buffer.append(df.info())
22
+ buffer.append(df.describe().to_string())
23
+
24
+ # Correlation matrix
25
+ plt.figure(figsize=(12, 10))
26
+ corr_matrix = df.corr()
27
+ sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm')
28
+ plt.title('Correlation Matrix')
29
+ st.pyplot(plt) # Display the correlation matrix directly in Streamlit
30
+
31
+ # Check for missing values
32
+ missing_values = df.isnull().sum()
33
+
34
+ return df, buffer, missing_values.to_string()
35
+
36
+ # Function for training and evaluating models
37
+ def train_and_evaluate_model(file, model_choice):
38
+ df, buffer, missing_values = load_and_process_data(file)
39
+
40
+ X = df.drop('target', axis=1)
41
+ y = df['target']
42
+
43
+ # Train-test split
44
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
45
+
46
+ # Standardize the features
47
+ scaler = StandardScaler()
48
+ X_train_scaled = scaler.fit_transform(X_train)
49
+ X_test_scaled = scaler.transform(X_test)
50
+
51
+ # Define models
52
+ models = {
53
+ 'Logistic Regression': LogisticRegression(random_state=42),
54
+ 'Decision Tree': DecisionTreeClassifier(random_state=42),
55
+ 'Random Forest': RandomForestClassifier(random_state=42)
56
+ }
57
+
58
+ model = models[model_choice]
59
+ model.fit(X_train_scaled, y_train)
60
+ y_pred = model.predict(X_test_scaled)
61
+
62
+ # Classification report
63
+ report = classification_report(y_test, y_pred)
64
+
65
+ # Confusion matrix plot
66
+ cm = confusion_matrix(y_test, y_pred)
67
+ plt.figure(figsize=(8, 6))
68
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
69
+ plt.title(f'Confusion Matrix - {model_choice}')
70
+ plt.xlabel('Predicted')
71
+ plt.ylabel('Actual')
72
+ st.pyplot(plt) # Display confusion matrix plot in Streamlit
73
+
74
+ return report, buffer, missing_values
75
+
76
+ # Streamlit app
77
+ st.title("Heart Disease Model Training")
78
+
79
+ # File upload
80
+ file = st.file_uploader("Upload CSV File", type=["csv"])
81
+
82
+ if file is not None:
83
+ # Model selection
84
+ model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest'])
85
+
86
+ if st.button("Run Model"):
87
+ # Train and evaluate the model
88
+ report, buffer, missing_values = train_and_evaluate_model(file, model_choice)
89
+
90
+ # Display the results
91
+ st.subheader("Model Results and Data Info")
92
+ st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}")