Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix | |
import plotly.express as px | |
import plotly.graph_objects as go | |
def load_data(): | |
data = pd.read_csv('exported_named_train_good.csv') | |
data_test = pd.read_csv('exported_named_test_good.csv') | |
X_train = data.drop("Target", axis=1) | |
y_train = data['Target'] | |
X_test = data_test.drop('Target', axis=1) | |
y_test = data_test['Target'] | |
return X_train, y_train, X_test, y_test, X_train.columns | |
def train_models(X_train, y_train, X_test, y_test): | |
models = { | |
"Logistic Regression": LogisticRegression(random_state=42), | |
"Decision Tree": DecisionTreeClassifier(random_state=42), | |
"Random Forest": RandomForestClassifier(random_state=42), | |
"Gradient Boost": GradientBoostingClassifier(random_state=42) | |
} | |
results = {} | |
for name, model in models.items(): | |
model.fit(X_train, y_train) | |
# Predictions | |
y_train_pred = model.predict(X_train) | |
y_test_pred = model.predict(X_test) | |
# Metrics | |
results[name] = { | |
'model': model, | |
'train_metrics': { | |
'accuracy': accuracy_score(y_train, y_train_pred), | |
'f1': f1_score(y_train, y_train_pred, average='weighted'), | |
'precision': precision_score(y_train, y_train_pred), | |
'recall': recall_score(y_train, y_train_pred), | |
'roc_auc': roc_auc_score(y_train, y_train_pred) | |
}, | |
'test_metrics': { | |
'accuracy': accuracy_score(y_test, y_test_pred), | |
'f1': f1_score(y_test, y_test_pred, average='weighted'), | |
'precision': precision_score(y_test, y_test_pred), | |
'recall': recall_score(y_test, y_test_pred), | |
'roc_auc': roc_auc_score(y_test, y_test_pred) | |
} | |
} | |
return results | |
def plot_model_performance(results): | |
metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc'] | |
fig, axes = plt.subplots(1, 2, figsize=(15, 6)) | |
# Training metrics | |
train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics] | |
for model in results.keys()} | |
train_df = pd.DataFrame(train_data, index=metrics) | |
train_df.plot(kind='bar', ax=axes[0], title='Training Performance') | |
axes[0].set_ylim(0, 1) | |
# Test metrics | |
test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics] | |
for model in results.keys()} | |
test_df = pd.DataFrame(test_data, index=metrics) | |
test_df.plot(kind='bar', ax=axes[1], title='Test Performance') | |
axes[1].set_ylim(0, 1) | |
plt.tight_layout() | |
return fig | |
def plot_feature_importance(model, feature_names, model_type): | |
plt.figure(figsize=(10, 6)) | |
if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]: | |
importance = model.feature_importances_ | |
elif model_type == "Logistic Regression": | |
importance = np.abs(model.coef_[0]) | |
importance_df = pd.DataFrame({ | |
'feature': feature_names, | |
'importance': importance | |
}).sort_values('importance', ascending=True) | |
plt.barh(importance_df['feature'], importance_df['importance']) | |
plt.title(f"Feature Importance - {model_type}") | |
return plt.gcf() | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score | |
import seaborn as sns | |
# Configuration de la page | |
st.set_page_config(layout="wide", page_title="ML Dashboard") | |
# Style personnalisé | |
st.markdown(""" | |
<style> | |
/* Cartes stylisées */ | |
div.css-1r6slb0.e1tzin5v2 { | |
background-color: #FFFFFF; | |
border: 1px solid #EEEEEE; | |
padding: 1.5rem; | |
border-radius: 10px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
/* Headers */ | |
.main-header { | |
font-size: 2rem; | |
font-weight: 700; | |
color: #1E88E5; | |
text-align: center; | |
margin-bottom: 2rem; | |
} | |
/* Metric containers */ | |
div.css-12w0qpk.e1tzin5v2 { | |
background-color: #F8F9FA; | |
padding: 1rem; | |
border-radius: 8px; | |
text-align: center; | |
} | |
/* Metric values */ | |
div.css-1xarl3l.e16fv1kl1 { | |
font-size: 1.8rem; | |
font-weight: 700; | |
color: #1E88E5; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def plot_performance_comparison(results, metric='test_metrics'): | |
"""Crée un graphique de comparaison des performances avec des couleurs distinctes""" | |
metrics = ['accuracy', 'f1', 'recall', 'roc_auc'] | |
model_names = list(results.keys()) | |
# Définir des couleurs distinctes pour chaque modèle | |
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4'] | |
data = {model: [results[model][metric][m] for m in metrics] | |
for model in model_names} | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
x = np.arange(len(metrics)) | |
width = 0.2 | |
for i, (model, values) in enumerate(data.items()): | |
ax.bar(x + i*width, values, width, label=model, color=colors[i]) | |
ax.set_ylabel('Score') | |
ax.set_title(f'Comparaison des performances ({metric.split("_")[0].title()})') | |
ax.set_xticks(x + width * (len(model_names)-1)/2) | |
ax.set_xticklabels(metrics) | |
ax.legend() | |
ax.grid(True, alpha=0.3) | |
plt.ylim(0, 1) | |
return fig | |
def create_metric_card(title, value): | |
"""Crée une carte de métrique stylisée""" | |
st.markdown(f""" | |
<div style=" | |
background-color: white; | |
padding: 1rem; | |
border-radius: 8px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
text-align: center; | |
margin-bottom: 1rem; | |
"> | |
<h3 style="color: #666; font-size: 1rem; margin-bottom: 0.5rem;">{title}</h3> | |
<p style="color: #1E88E5; font-size: 1.8rem; font-weight: bold; margin: 0;">{value:.3f}</p> | |
</div> | |
""", unsafe_allow_html=True) | |
def app(): | |
# Header | |
st.markdown('<h1 class="main-header">Tableau de Bord ML</h1>', unsafe_allow_html=True) | |
# Charger et préparer les données | |
X_train, y_train, X_test, y_test, feature_names = load_data() | |
# Sidebar pour la sélection du modèle | |
with st.sidebar: | |
st.markdown('<h2 style="color: #1E88E5;">Configuration</h2>', unsafe_allow_html=True) | |
selected_model = st.selectbox( | |
"Sélectionner un modèle", | |
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"] | |
) | |
# Entraînement des modèles si pas déjà fait | |
if 'model_results' not in st.session_state: | |
with st.spinner("⏳ Entraînement des modèles..."): | |
st.session_state.model_results = train_models(X_train, y_train, X_test, y_test) | |
# Layout principal | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
# Graphiques de performance | |
st.markdown("### 📊 Comparaison des Performances") | |
tab1, tab2 = st.tabs(["🎯 Test", "📈 Entraînement"]) | |
with tab1: | |
fig_test = plot_performance_comparison(st.session_state.model_results, 'test_metrics') | |
st.pyplot(fig_test) | |
with tab2: | |
fig_train = plot_performance_comparison(st.session_state.model_results, 'train_metrics') | |
st.pyplot(fig_train) | |
with col2: | |
# Métriques détaillées du modèle sélectionné | |
st.markdown(f"### 📌 Métriques - {selected_model}") | |
metrics = st.session_state.model_results[selected_model]['test_metrics'] | |
for metric, value in metrics.items(): | |
if metric != 'precision': # On exclut la précision | |
create_metric_card(metric.upper(), value) | |
# Section inférieure | |
st.markdown("### 🔍 Analyse Détaillée") | |
col3, col4 = st.columns(2) | |
with col3: | |
# Feature Importance | |
current_model = st.session_state.model_results[selected_model]['model'] | |
if hasattr(current_model, 'feature_importances_') or hasattr(current_model, 'coef_'): | |
fig_importance = plt.figure(figsize=(10, 6)) | |
if hasattr(current_model, 'feature_importances_'): | |
importances = current_model.feature_importances_ | |
else: | |
importances = np.abs(current_model.coef_[0]) | |
plt.barh(feature_names, importances) | |
plt.title("Importance des Caractéristiques") | |
st.pyplot(fig_importance) | |
with col4: | |
# Matrice de corrélation | |
fig_corr = plt.figure(figsize=(10, 8)) | |
sns.heatmap(X_train.corr(), annot=True, cmap='coolwarm', center=0) | |
plt.title("Matrice de Corrélation") | |
st.pyplot(fig_corr) | |
if __name__ == "__main__": | |
app() |