Spaces:

Roberta2024
/

TESTTT

Sleeping

File size: 2,645 Bytes

605e3c4

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import numpy as np

# Function to process data and return feature importances
def calculate_importances(file):
    # Read uploaded file
    heart_df = pd.read_csv(file)
    
    # Set X and y
    X = heart_df.drop('target', axis=1)
    y = heart_df['target']
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    # Initialize models
    rf_model = RandomForestClassifier(random_state=42)
    xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    cart_model = DecisionTreeClassifier(random_state=42)
    
    # Train models
    rf_model.fit(X_train, y_train)
    xgb_model.fit(X_train, y_train)
    cart_model.fit(X_train, y_train)
    
    # Get feature importances
    rf_importances = rf_model.feature_importances_
    xgb_importances = xgb_model.feature_importances_
    cart_importances = cart_model.feature_importances_
    
    feature_names = X.columns
    
    # Prepare DataFrame
    rf_importance = {'Feature': feature_names, 'Random Forest': rf_importances}
    xgb_importance = {'Feature': feature_names, 'XGBoost': xgb_importances}
    cart_importance = {'Feature': feature_names, 'CART': cart_importances}
    
    # Create DataFrames
    rf_df = pd.DataFrame(rf_importance)
    xgb_df = pd.DataFrame(xgb_importance)
    cart_df = pd.DataFrame(cart_importance)
    
    # Merge DataFrames
    importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')
    
    # Save to Excel
    file_name = 'feature_importances.xlsx'
    importance_df.to_excel(file_name, index=False)
    
    return file_name, importance_df.head()

# Streamlit interface
st.title("Feature Importance Calculation")

# File upload
uploaded_file = st.file_uploader("Upload heart.csv file", type=['csv'])

if uploaded_file is not None:
    # Process the file and get results
    excel_file, preview_df = calculate_importances(uploaded_file)
    
    # Display a preview of the DataFrame
    st.write("Feature Importances (Preview):")
    st.dataframe(preview_df)
    
    # Provide a link to download the Excel file
    with open(excel_file, "rb") as file:
        btn = st.download_button(
            label="Download Excel File",
            data=file,
            file_name=excel_file,
            mime="application/vnd.ms-excel"
        )