Travel.Com / app.py
analist's picture
Update app.py
ee9aa01 verified
raw
history blame
10.9 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree, export_text
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import shap
def load_data():
data = pd.read_csv('exported_named_train_good.csv')
data_test = pd.read_csv('exported_named_test_good.csv')
X_train = data.drop("Target", axis=1)
y_train = data['Target']
X_test = data_test.drop('Target', axis=1)
y_test = data_test['Target']
return X_train, y_train, X_test, y_test, X_train.columns
def train_models(X_train, y_train, X_test, y_test):
models = {
"Logistic Regression": LogisticRegression(random_state=42),
"Decision Tree": DecisionTreeClassifier(random_state=42),
"Random Forest": RandomForestClassifier(random_state=42),
"Gradient Boost": GradientBoostingClassifier(random_state=42)
}
results = {}
for name, model in models.items():
model.fit(X_train, y_train)
# Predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
# Metrics
results[name] = {
'model': model,
'train_metrics': {
'accuracy': accuracy_score(y_train, y_train_pred),
'f1': f1_score(y_train, y_train_pred, average='weighted'),
'precision': precision_score(y_train, y_train_pred),
'recall': recall_score(y_train, y_train_pred),
'roc_auc': roc_auc_score(y_train, y_train_pred)
},
'test_metrics': {
'accuracy': accuracy_score(y_test, y_test_pred),
'f1': f1_score(y_test, y_test_pred, average='weighted'),
'precision': precision_score(y_test, y_test_pred),
'recall': recall_score(y_test, y_test_pred),
'roc_auc': roc_auc_score(y_test, y_test_pred)
}
}
return results
def plot_model_performance(results):
metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
# Training metrics
train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics]
for model in results.keys()}
train_df = pd.DataFrame(train_data, index=metrics)
train_df.plot(kind='bar', ax=axes[0], title='Training Performance')
axes[0].set_ylim(0, 1)
# Test metrics
test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
for model in results.keys()}
test_df = pd.DataFrame(test_data, index=metrics)
test_df.plot(kind='bar', ax=axes[1], title='Test Performance')
axes[1].set_ylim(0, 1)
plt.tight_layout()
return fig
def plot_feature_importance(model, feature_names, model_type):
plt.figure(figsize=(10, 6))
if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]:
importance = model.feature_importances_
elif model_type == "Logistic Regression":
importance = np.abs(model.coef_[0])
importance_df = pd.DataFrame({
'feature': feature_names,
'importance': importance
}).sort_values('importance', ascending=True)
plt.barh(importance_df['feature'], importance_df['importance'])
plt.title(f"Feature Importance - {model_type}")
return plt.gcf()
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree, export_text
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import shap
# Configuration de la page et du thème
st.set_page_config(
page_title="ML Model Interpreter",
layout="wide",
initial_sidebar_state="expanded"
)
# CSS personnalisé
st.markdown("""
<style>
/* Couleurs principales */
:root {
--primary-blue: #1E88E5;
--light-blue: #90CAF9;
--dark-blue: #0D47A1;
--white: #FFFFFF;
}
/* En-tête principal */
.main-header {
color: var(--dark-blue);
text-align: center;
padding: 1rem;
background: linear-gradient(90deg, var(--white) 0%, var(--light-blue) 50%, var(--white) 100%);
border-radius: 10px;
margin-bottom: 2rem;
}
/* Carte pour les métriques */
.metric-card {
background-color: white;
padding: 1.5rem;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
margin-bottom: 1rem;
}
/* Style pour les sous-titres */
.sub-header {
color: var(--primary-blue);
border-bottom: 2px solid var(--light-blue);
padding-bottom: 0.5rem;
margin-bottom: 1rem;
}
/* Style pour les valeurs de métriques */
.metric-value {
font-size: 1.5rem;
font-weight: bold;
color: var(--primary-blue);
}
/* Style pour la barre latérale */
.sidebar .sidebar-content {
background-color: var(--white);
}
/* Style pour les boutons */
.stButton > button {
background-color: var(--primary-blue);
color: white;
border-radius: 5px;
border: none;
padding: 0.5rem 1rem;
}
/* Style pour les sliders */
.stSlider > div > div {
background-color: var(--light-blue);
}
/* Style pour les selectbox */
.stSelectbox > div > div {
background-color: white;
border: 1px solid var(--light-blue);
}
</style>
""", unsafe_allow_html=True)
def custom_metric_card(title, value, prefix=""):
return f"""
<div class="metric-card">
<h3 style="color: #1E88E5; margin-bottom: 0.5rem;">{title}</h3>
<p class="metric-value">{prefix}{value:.4f}</p>
</div>
"""
def plot_with_style(fig):
# Style matplotlib
plt.style.use('seaborn')
fig.patch.set_facecolor('#FFFFFF')
for ax in fig.axes:
ax.set_facecolor('#F8F9FA')
ax.grid(True, linestyle='--', alpha=0.7)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
return fig
# [Fonctions load_data et train_models restent identiques]
def plot_model_performance(results):
metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
# Configuration du style
plt.style.use('seaborn')
colors = ['#1E88E5', '#90CAF9', '#0D47A1', '#42A5F5']
# Training metrics
train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics]
for model in results.keys()}
train_df = pd.DataFrame(train_data, index=metrics)
train_df.plot(kind='bar', ax=axes[0], title='Performance d\'Entraînement',
color=colors)
axes[0].set_ylim(0, 1)
# Test metrics
test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
for model in results.keys()}
test_df = pd.DataFrame(test_data, index=metrics)
test_df.plot(kind='bar', ax=axes[1], title='Performance de Test',
color=colors)
axes[1].set_ylim(0, 1)
# Style des graphiques
for ax in axes:
ax.set_facecolor('#F8F9FA')
ax.grid(True, linestyle='--', alpha=0.7)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
return fig
def app():
# En-tête principal avec style personnalisé
st.markdown('<h1 class="main-header">Interpréteur de Modèles ML</h1>', unsafe_allow_html=True)
# Load data
X_train, y_train, X_test, y_test, feature_names = load_data()
# Train models if not in session state
if 'model_results' not in st.session_state:
with st.spinner("🔄 Entraînement des modèles en cours..."):
st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
# Sidebar avec style personnalisé
with st.sidebar:
st.markdown('<h2 style="color: #1E88E5;">Navigation</h2>', unsafe_allow_html=True)
selected_model = st.selectbox(
"📊 Sélectionnez un modèle",
list(st.session_state.model_results.keys())
)
st.markdown('<hr style="margin: 1rem 0;">', unsafe_allow_html=True)
page = st.radio(
"📑 Sélectionnez une section",
["Performance des modèles",
"Interprétation du modèle",
"Analyse des caractéristiques",
"Simulateur de prédictions"]
)
current_model = st.session_state.model_results[selected_model]['model']
# Container principal avec padding
main_container = st.container()
with main_container:
if page == "Performance des modèles":
st.markdown('<h2 class="sub-header">Performance des modèles</h2>', unsafe_allow_html=True)
# Graphiques de performance
performance_fig = plot_model_performance(st.session_state.model_results)
st.pyplot(plot_with_style(performance_fig))
# Métriques détaillées dans des cartes
st.markdown('<h3 class="sub-header">Métriques détaillées</h3>', unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
st.markdown('<h4 style="color: #1E88E5;">Entraînement</h4>', unsafe_allow_html=True)
for metric, value in st.session_state.model_results[selected_model]['train_metrics'].items():
st.markdown(custom_metric_card(metric.capitalize(), value), unsafe_allow_html=True)
with col2:
st.markdown('<h4 style="color: #1E88E5;">Test</h4>', unsafe_allow_html=True)
for metric, value in st.session_state.model_results[selected_model]['test_metrics'].items():
st.markdown(custom_metric_card(metric.capitalize(), value), unsafe_allow_html=True)
# [Le reste des sections avec style adapté...]
if __name__ == "__main__":
app()