Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix | |
import plotly.express as px | |
import plotly.graph_objects as go | |
# Configuration de la page | |
st.set_page_config(layout="wide", page_title="ML Dashboard") | |
# Fonction pour charger les données | |
def load_data(file): | |
data = pd.read_csv(file) | |
return data | |
# Fonction pour entraîner les modèles | |
def train_model(X_train, y_train, model_name): | |
models = { | |
"Logistic Regression": LogisticRegression(), | |
"Decision Tree": DecisionTreeClassifier(), | |
"Random Forest": RandomForestClassifier(), | |
"Gradient Boost": GradientBoostingClassifier() | |
} | |
model = models[model_name] | |
model.fit(X_train, y_train) | |
return model | |
def app(): | |
# Sidebar pour la navigation | |
st.sidebar.title("Navigation") | |
page = st.sidebar.radio("", ["📊 Vue d'ensemble", "🎯 Prédiction", "🔍 Interprétation", "⚙️ Entraînement"]) | |
# Charger les données par défaut | |
if 'data' not in st.session_state: | |
try: | |
st.session_state.data = load_data('exported_named_train_good.csv') | |
st.session_state.test_data = load_data('exported_named_test_good.csv') | |
except: | |
st.session_state.data = None | |
st.session_state.test_data = None | |
# Vue d'ensemble | |
if page == "📊 Vue d'ensemble": | |
st.title("Tableau de bord ML") | |
# Layout en colonnes | |
col1, col2 = st.columns([2, 1]) | |
with col1: | |
# Upload de données | |
uploaded_file = st.file_uploader("Charger de nouvelles données", type=['csv']) | |
if uploaded_file is not None: | |
st.session_state.data = load_data(uploaded_file) | |
with col2: | |
# Sélection du modèle | |
model_name = st.selectbox( | |
"Sélectionner un modèle", | |
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"] | |
) | |
if st.session_state.data is not None: | |
# Métriques principales | |
col1, col2, col3, col4, col5 = st.columns(5) | |
# Supposons que nous avons déjà un modèle entraîné | |
X = st.session_state.data.drop("Target", axis=1) | |
y = st.session_state.data["Target"] | |
model = train_model(X, y, model_name) | |
y_pred = model.predict(X) | |
with col1: | |
st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}") | |
with col2: | |
st.metric("Precision", f"{precision_score(y, y_pred):.2%}") | |
with col3: | |
st.metric("Recall", f"{recall_score(y, y_pred):.2%}") | |
with col4: | |
st.metric("F1-Score", f"{f1_score(y, y_pred):.2%}") | |
with col5: | |
st.metric("ROC AUC", f"{roc_auc_score(y, y_pred):.2%}") | |
# Graphiques | |
col1, col2 = st.columns(2) | |
with col1: | |
st.subheader("Importance des features") | |
if hasattr(model, 'feature_importances_'): | |
importances = pd.DataFrame({ | |
'feature': X.columns, | |
'importance': model.feature_importances_ | |
}).sort_values('importance', ascending=True) | |
fig = px.bar(importances, x='importance', y='feature', orientation='h') | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
st.subheader("Matrice de confusion") | |
cm = confusion_matrix(y, y_pred) | |
fig = px.imshow(cm, | |
labels=dict(x="Prédit", y="Réel"), | |
text=cm) | |
st.plotly_chart(fig, use_container_width=True) | |
elif page == "🎯 Prédiction": | |
st.title("Prédiction") | |
if st.session_state.data is not None: | |
X = st.session_state.data.drop("Target", axis=1) | |
# Interface de prédiction | |
st.subheader("Entrer les valeurs pour la prédiction") | |
input_values = {} | |
cols = st.columns(3) | |
for idx, feature in enumerate(X.columns): | |
with cols[idx % 3]: | |
if X[feature].dtype == 'object': | |
input_values[feature] = st.selectbox( | |
f"{feature}", | |
options=X[feature].unique() | |
) | |
else: | |
input_values[feature] = st.number_input( | |
f"{feature}", | |
value=float(X[feature].mean()) | |
) | |
if st.button("Prédire"): | |
model = train_model(X, st.session_state.data["Target"], "Random Forest") | |
pred = model.predict_proba(pd.DataFrame([input_values])) | |
st.subheader("Résultat de la prédiction") | |
proba_df = pd.DataFrame({ | |
'Classe': ['0', '1'], | |
'Probabilité': pred[0] | |
}) | |
fig = px.bar(proba_df, x='Classe', y='Probabilité') | |
st.plotly_chart(fig) | |
elif page == "🔍 Interprétation": | |
st.title("Interprétation du modèle") | |
if st.session_state.data is not None: | |
# SHAP values ou autres méthodes d'interprétation | |
st.subheader("Analyse des features") | |
X = st.session_state.data.drop("Target", axis=1) | |
y = st.session_state.data["Target"] | |
feature_1 = st.selectbox("Sélectionner la première feature", X.columns) | |
feature_2 = st.selectbox("Sélectionner la deuxième feature", X.columns) | |
fig = px.scatter(st.session_state.data, | |
x=feature_1, | |
y=feature_2, | |
color='Target', | |
title=f"Relation entre {feature_1} et {feature_2}") | |
st.plotly_chart(fig) | |
elif page == "⚙️ Entraînement": | |
st.title("Entraînement du modèle") | |
if st.session_state.data is not None: | |
# Options d'entraînement | |
model_name = st.selectbox( | |
"Sélectionner le modèle à entraîner", | |
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"] | |
) | |
# Paramètres du modèle | |
st.subheader("Paramètres du modèle") | |
if model_name == "Random Forest": | |
n_estimators = st.slider("Nombre d'arbres", 10, 200, 100) | |
max_depth = st.slider("Profondeur maximale", 1, 20, 10) | |
if st.button("Entraîner le modèle"): | |
with st.spinner("Entraînement en cours..."): | |
X = st.session_state.data.drop("Target", axis=1) | |
y = st.session_state.data["Target"] | |
model = train_model(X, y, model_name) | |
st.success("Modèle entraîné avec succès!") | |
# Afficher les métriques | |
y_pred = model.predict(X) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}") | |
with col2: | |
st.metric("Precision", f"{precision_score(y, y_pred):.2%}") | |
with col3: | |
st.metric("Recall", f"{recall_score(y, y_pred):.2%}") | |
if __name__ == '__main__': | |
app() | |