Spaces:

Rozeeeee
/

oooooo

Sleeping

App Files Files Community

oooooo / app.py

Rozeeeee

Create app.py

0ab2276 verified 9 months ago

raw

history blame contribute delete

3.13 kB

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import streamlit as st
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import LogisticRegression
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import confusion_matrix, classification_report
	from io import BytesIO
	from PIL import Image

	# Function for loading and processing the dataset
	def load_and_process_data(file):
	df = pd.read_csv(file) # Streamlit handles file reading directly

	# Display basic information
	buffer = []
	buffer.append(df.info())
	buffer.append(df.describe().to_string())

	# Correlation matrix
	plt.figure(figsize=(12, 10))
	corr_matrix = df.corr()
	sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm')
	plt.title('Correlation Matrix')
	st.pyplot(plt) # Display the correlation matrix directly in Streamlit

	# Check for missing values
	missing_values = df.isnull().sum()

	return df, buffer, missing_values.to_string()

	# Function for training and evaluating models
	def train_and_evaluate_model(file, model_choice):
	df, buffer, missing_values = load_and_process_data(file)

	X = df.drop('target', axis=1)
	y = df['target']

	# Train-test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Standardize the features
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Define models
	models = {
	'Logistic Regression': LogisticRegression(random_state=42),
	'Decision Tree': DecisionTreeClassifier(random_state=42),
	'Random Forest': RandomForestClassifier(random_state=42)
	}

	model = models[model_choice]
	model.fit(X_train_scaled, y_train)
	y_pred = model.predict(X_test_scaled)

	# Classification report
	report = classification_report(y_test, y_pred)

	# Confusion matrix plot
	cm = confusion_matrix(y_test, y_pred)
	plt.figure(figsize=(8, 6))
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
	plt.title(f'Confusion Matrix - {model_choice}')
	plt.xlabel('Predicted')
	plt.ylabel('Actual')
	st.pyplot(plt) # Display confusion matrix plot in Streamlit

	return report, buffer, missing_values

	# Streamlit app
	st.title("Heart Disease Model Training")

	# File upload
	file = st.file_uploader("Upload CSV File", type=["csv"])

	if file is not None:
	# Model selection
	model_choice = st.selectbox("Choose a Model", ['Logistic Regression', 'Decision Tree', 'Random Forest'])

	if st.button("Run Model"):
	# Train and evaluate the model
	report, buffer, missing_values = train_and_evaluate_model(file, model_choice)

	# Display the results
	st.subheader("Model Results and Data Info")
	st.text(f"Model: {model_choice}\n\nClassification Report:\n{report}\n\nMissing Values:\n{missing_values}")