Spaces:

Rozeeeee
/

oooooo

Sleeping

App Files Files Community

Rozeeeee commited on Sep 26, 2024

Commit

0ab2276

verified ·

1 Parent(s): ee60f4a

Create app.py

Browse files

Files changed (1) hide show

app.py +92 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import streamlit as st
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import confusion_matrix, classification_report
+from io import BytesIO
+from PIL import Image
+# Function for loading and processing the dataset
+def load_and_process_data(file):
+    df = pd.read_csv(file)  # Streamlit handles file reading directly
+    # Display basic information
+    buffer = []
+    buffer.append(df.info())
+    buffer.append(df.describe().to_string())
+    # Correlation matrix
+    plt.figure(figsize=(12, 10))
+    corr_matrix = df.corr()
+    sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm')
+    plt.title('Correlation Matrix')
+    st.pyplot(plt)  # Display the correlation matrix directly in Streamlit
+    # Check for missing values
+    missing_values = df.isnull().sum()
+    return df, buffer, missing_values.to_string()
+# Function for training and evaluating models
+def train_and_evaluate_model(file, model_choice):
+    df, buffer, missing_values = load_and_process_data(file)
+    X = df.drop('target', axis=1)
+    y = df['target']
+    # Train-test split
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Standardize the features
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_test_scaled = scaler.transform(X_test)
+    # Define models
+    models = {
+        'Logistic Regression': LogisticRegression(random_state=42),
+        'Decision Tree': DecisionTreeClassifier(random_state=42),
+        'Random Forest': RandomForestClassifier(random_state=42)
+    }
+    model = models[model_choice]
+    model.fit(X_train_scaled, y_train)
+    y_pred = model.predict(X_test_scaled)
+    # Classification report
+    report = classification_report(y_test, y_pred)
+    # Confusion matrix plot
+    cm = confusion_matrix(y_test, y_pred)
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
+    plt.title(f'Confusion Matrix - {model_choice}')
+    plt.xlabel('Predicted')
+    plt.ylabel('Actual')
+    st.pyplot(plt)  # Display confusion matrix plot in Streamlit
+    return report, buffer, missing_values
+# Streamlit app
+st.title("Heart Disease Model Training")
+# File upload
+file = st.file_uploader("Upload CSV File", type=["csv"])
+if file is not None:
+    # Model selection
+    model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest'])
+    if st.button("Run Model"):
+        # Train and evaluate the model
+        report, buffer, missing_values = train_and_evaluate_model(file, model_choice)
+        # Display the results
+        st.subheader("Model Results and Data Info")
+        st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}")