Travel.Com / app.py
analist's picture
Update app.py
8384234 verified
raw
history blame
9.49 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree, export_text
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import shap
def load_data():
data = pd.read_csv('exported_named_train_good.csv')
data_test = pd.read_csv('exported_named_test_good.csv')
X_train = data.drop("Target", axis=1)
y_train = data['Target']
X_test = data_test.drop('Target', axis=1)
y_test = data_test['Target']
return X_train, y_train, X_test, y_test, X_train.columns
def train_models(X_train, y_train, X_test, y_test):
models = {
"Logistic Regression": LogisticRegression(random_state=42),
"Decision Tree": DecisionTreeClassifier(random_state=42),
"Random Forest": RandomForestClassifier(random_state=42),
"Gradient Boost": GradientBoostingClassifier(random_state=42)
}
results = {}
for name, model in models.items():
model.fit(X_train, y_train)
# Predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
# Metrics
results[name] = {
'model': model,
'train_metrics': {
'accuracy': accuracy_score(y_train, y_train_pred),
'f1': f1_score(y_train, y_train_pred, average='weighted'),
'precision': precision_score(y_train, y_train_pred),
'recall': recall_score(y_train, y_train_pred),
'roc_auc': roc_auc_score(y_train, y_train_pred)
},
'test_metrics': {
'accuracy': accuracy_score(y_test, y_test_pred),
'f1': f1_score(y_test, y_test_pred, average='weighted'),
'precision': precision_score(y_test, y_test_pred),
'recall': recall_score(y_test, y_test_pred),
'roc_auc': roc_auc_score(y_test, y_test_pred)
}
}
return results
def plot_model_performance(results):
metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
# Training metrics
train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics]
for model in results.keys()}
train_df = pd.DataFrame(train_data, index=metrics)
train_df.plot(kind='bar', ax=axes[0], title='Training Performance')
axes[0].set_ylim(0, 1)
# Test metrics
test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
for model in results.keys()}
test_df = pd.DataFrame(test_data, index=metrics)
test_df.plot(kind='bar', ax=axes[1], title='Test Performance')
axes[1].set_ylim(0, 1)
plt.tight_layout()
return fig
def plot_feature_importance(model, feature_names, model_type):
plt.figure(figsize=(10, 6))
if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]:
importance = model.feature_importances_
elif model_type == "Logistic Regression":
importance = np.abs(model.coef_[0])
importance_df = pd.DataFrame({
'feature': feature_names,
'importance': importance
}).sort_values('importance', ascending=True)
plt.barh(importance_df['feature'], importance_df['importance'])
plt.title(f"Feature Importance - {model_type}")
return plt.gcf()
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
import seaborn as sns
# Configuration de la page
st.set_page_config(layout="wide", page_title="ML Dashboard")
# Style personnalisé
st.markdown("""
<style>
/* Cartes stylisées */
div.css-1r6slb0.e1tzin5v2 {
background-color: #FFFFFF;
border: 1px solid #EEEEEE;
padding: 1.5rem;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
/* Headers */
.main-header {
font-size: 2rem;
font-weight: 700;
color: #1E88E5;
text-align: center;
margin-bottom: 2rem;
}
/* Metric containers */
div.css-12w0qpk.e1tzin5v2 {
background-color: #F8F9FA;
padding: 1rem;
border-radius: 8px;
text-align: center;
}
/* Metric values */
div.css-1xarl3l.e16fv1kl1 {
font-size: 1.8rem;
font-weight: 700;
color: #1E88E5;
}
</style>
""", unsafe_allow_html=True)
def plot_performance_comparison(results, metric='test_metrics'):
"""Crée un graphique de comparaison des performances avec des couleurs distinctes"""
metrics = ['accuracy', 'f1', 'recall', 'roc_auc']
model_names = list(results.keys())
# Définir des couleurs distinctes pour chaque modèle
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
data = {model: [results[model][metric][m] for m in metrics]
for model in model_names}
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(metrics))
width = 0.2
for i, (model, values) in enumerate(data.items()):
ax.bar(x + i*width, values, width, label=model, color=colors[i])
ax.set_ylabel('Score')
ax.set_title(f'Comparaison des performances ({metric.split("_")[0].title()})')
ax.set_xticks(x + width * (len(model_names)-1)/2)
ax.set_xticklabels(metrics)
ax.legend()
ax.grid(True, alpha=0.3)
plt.ylim(0, 1)
return fig
def create_metric_card(title, value):
"""Crée une carte de métrique stylisée"""
st.markdown(f"""
<div style="
background-color: white;
padding: 1rem;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
text-align: center;
margin-bottom: 1rem;
">
<h3 style="color: #666; font-size: 1rem; margin-bottom: 0.5rem;">{title}</h3>
<p style="color: #1E88E5; font-size: 1.8rem; font-weight: bold; margin: 0;">{value:.3f}</p>
</div>
""", unsafe_allow_html=True)
def app():
# Header
st.markdown('<h1 class="main-header">Tableau de Bord ML</h1>', unsafe_allow_html=True)
# Charger et préparer les données
X_train, y_train, X_test, y_test, feature_names = load_data()
# Sidebar pour la sélection du modèle
with st.sidebar:
st.markdown('<h2 style="color: #1E88E5;">Configuration</h2>', unsafe_allow_html=True)
selected_model = st.selectbox(
"Sélectionner un modèle",
["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
)
# Entraînement des modèles si pas déjà fait
if 'model_results' not in st.session_state:
with st.spinner("⏳ Entraînement des modèles..."):
st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
# Layout principal
col1, col2 = st.columns([2, 1])
with col1:
# Graphiques de performance
st.markdown("### 📊 Comparaison des Performances")
tab1, tab2 = st.tabs(["🎯 Test", "📈 Entraînement"])
with tab1:
fig_test = plot_performance_comparison(st.session_state.model_results, 'test_metrics')
st.pyplot(fig_test)
with tab2:
fig_train = plot_performance_comparison(st.session_state.model_results, 'train_metrics')
st.pyplot(fig_train)
with col2:
# Métriques détaillées du modèle sélectionné
st.markdown(f"### 📌 Métriques - {selected_model}")
metrics = st.session_state.model_results[selected_model]['test_metrics']
for metric, value in metrics.items():
if metric != 'precision': # On exclut la précision
create_metric_card(metric.upper(), value)
# Section inférieure
st.markdown("### 🔍 Analyse Détaillée")
col3, col4 = st.columns(2)
with col3:
# Feature Importance
current_model = st.session_state.model_results[selected_model]['model']
if hasattr(current_model, 'feature_importances_') or hasattr(current_model, 'coef_'):
fig_importance = plt.figure(figsize=(10, 6))
if hasattr(current_model, 'feature_importances_'):
importances = current_model.feature_importances_
else:
importances = np.abs(current_model.coef_[0])
plt.barh(feature_names, importances)
plt.title("Importance des Caractéristiques")
st.pyplot(fig_importance)
with col4:
# Matrice de corrélation
fig_corr = plt.figure(figsize=(10, 8))
sns.heatmap(X_train.corr(), annot=True, cmap='coolwarm', center=0)
plt.title("Matrice de Corrélation")
st.pyplot(fig_corr)
if __name__ == "__main__":
app()