analist commited on
Commit
97c2ba7
·
verified ·
1 Parent(s): ad2c4e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -176
app.py CHANGED
@@ -12,191 +12,256 @@ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_sc
12
  import plotly.express as px
13
  import plotly.graph_objects as go
14
 
15
- # Configuration de la page
16
- st.set_page_config(layout="wide", page_title="ML Dashboard")
 
 
 
 
 
 
17
 
18
- # Fonction pour charger les données
19
- @st.cache_data
20
- def load_data(file):
21
- data = pd.read_csv(file)
22
- return data
23
-
24
- # Fonction pour entraîner les modèles
25
- def train_model(X_train, y_train, model_name):
26
  models = {
27
- "Logistic Regression": LogisticRegression(),
28
- "Decision Tree": DecisionTreeClassifier(),
29
- "Random Forest": RandomForestClassifier(),
30
- "Gradient Boost": GradientBoostingClassifier()
31
  }
32
- model = models[model_name]
33
- model.fit(X_train, y_train)
34
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- def app():
37
- # Sidebar pour la navigation
38
- st.sidebar.title("Navigation")
39
- page = st.sidebar.radio("", ["📊 Vue d'ensemble", "🎯 Prédiction", "🔍 Interprétation", "⚙️ Entraînement"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Charger les données par défaut
42
- if 'data' not in st.session_state:
43
- try:
44
- st.session_state.data = load_data('exported_named_train_good.csv')
45
- st.session_state.test_data = load_data('exported_named_test_good.csv')
46
- except:
47
- st.session_state.data = None
48
- st.session_state.test_data = None
49
 
50
- # Vue d'ensemble
51
- if page == "📊 Vue d'ensemble":
52
- st.title("Tableau de bord ML")
53
-
54
- # Layout en colonnes
55
- col1, col2 = st.columns([2, 1])
56
-
57
- with col1:
58
- # Upload de données
59
- uploaded_file = st.file_uploader("Charger de nouvelles données", type=['csv'])
60
- if uploaded_file is not None:
61
- st.session_state.data = load_data(uploaded_file)
62
-
63
- with col2:
64
- # Sélection du modèle
65
- model_name = st.selectbox(
66
- "Sélectionner un modèle",
67
- ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
68
- )
69
-
70
- if st.session_state.data is not None:
71
- # Métriques principales
72
- col1, col2, col3, col4, col5 = st.columns(5)
73
-
74
- # Supposons que nous avons déjà un modèle entraîné
75
- X = st.session_state.data.drop("Target", axis=1)
76
- y = st.session_state.data["Target"]
77
- model = train_model(X, y, model_name)
78
- y_pred = model.predict(X)
79
-
80
- with col1:
81
- st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}")
82
- with col2:
83
- st.metric("Precision", f"{precision_score(y, y_pred):.2%}")
84
- with col3:
85
- st.metric("Recall", f"{recall_score(y, y_pred):.2%}")
86
- with col4:
87
- st.metric("F1-Score", f"{f1_score(y, y_pred):.2%}")
88
- with col5:
89
- st.metric("ROC AUC", f"{roc_auc_score(y, y_pred):.2%}")
90
-
91
- # Graphiques
92
- col1, col2 = st.columns(2)
93
-
94
- with col1:
95
- st.subheader("Importance des features")
96
- if hasattr(model, 'feature_importances_'):
97
- importances = pd.DataFrame({
98
- 'feature': X.columns,
99
- 'importance': model.feature_importances_
100
- }).sort_values('importance', ascending=True)
101
- fig = px.bar(importances, x='importance', y='feature', orientation='h')
102
- st.plotly_chart(fig, use_container_width=True)
103
-
104
- with col2:
105
- st.subheader("Matrice de confusion")
106
- cm = confusion_matrix(y, y_pred)
107
- fig = px.imshow(cm,
108
- labels=dict(x="Prédit", y="Réel"),
109
- text=cm)
110
- st.plotly_chart(fig, use_container_width=True)
111
 
112
- elif page == "🎯 Prédiction":
113
- st.title("Prédiction")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- if st.session_state.data is not None:
116
- X = st.session_state.data.drop("Target", axis=1)
117
-
118
- # Interface de prédiction
119
- st.subheader("Entrer les valeurs pour la prédiction")
120
-
121
- input_values = {}
122
- cols = st.columns(3)
123
- for idx, feature in enumerate(X.columns):
124
- with cols[idx % 3]:
125
- if X[feature].dtype == 'object':
126
- input_values[feature] = st.selectbox(
127
- f"{feature}",
128
- options=X[feature].unique()
129
- )
130
- else:
131
- input_values[feature] = st.number_input(
132
- f"{feature}",
133
- value=float(X[feature].mean())
134
- )
135
-
136
- if st.button("Prédire"):
137
- model = train_model(X, st.session_state.data["Target"], "Random Forest")
138
- pred = model.predict_proba(pd.DataFrame([input_values]))
139
-
140
- st.subheader("Résultat de la prédiction")
141
- proba_df = pd.DataFrame({
142
- 'Classe': ['0', '1'],
143
- 'Probabilité': pred[0]
144
- })
145
- fig = px.bar(proba_df, x='Classe', y='Probabilité')
146
- st.plotly_chart(fig)
147
 
148
- elif page == "🔍 Interprétation":
149
- st.title("Interprétation du modèle")
150
-
151
- if st.session_state.data is not None:
152
- # SHAP values ou autres méthodes d'interprétation
153
- st.subheader("Analyse des features")
154
- X = st.session_state.data.drop("Target", axis=1)
155
- y = st.session_state.data["Target"]
156
-
157
- feature_1 = st.selectbox("Sélectionner la première feature", X.columns)
158
- feature_2 = st.selectbox("Sélectionner la deuxième feature", X.columns)
159
-
160
- fig = px.scatter(st.session_state.data,
161
- x=feature_1,
162
- y=feature_2,
163
- color='Target',
164
- title=f"Relation entre {feature_1} et {feature_2}")
165
- st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
166
 
167
- elif page == "⚙️ Entraînement":
168
- st.title("Entraînement du modèle")
169
-
170
- if st.session_state.data is not None:
171
- # Options d'entraînement
172
- model_name = st.selectbox(
173
- "Sélectionner le modèle à entraîner",
174
- ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
175
- )
176
-
177
- # Paramètres du modèle
178
- st.subheader("Paramètres du modèle")
179
- if model_name == "Random Forest":
180
- n_estimators = st.slider("Nombre d'arbres", 10, 200, 100)
181
- max_depth = st.slider("Profondeur maximale", 1, 20, 10)
182
-
183
- if st.button("Entraîner le modèle"):
184
- with st.spinner("Entraînement en cours..."):
185
- X = st.session_state.data.drop("Target", axis=1)
186
- y = st.session_state.data["Target"]
187
- model = train_model(X, y, model_name)
188
- st.success("Modèle entraîné avec succès!")
189
-
190
- # Afficher les métriques
191
- y_pred = model.predict(X)
192
- col1, col2, col3 = st.columns(3)
193
- with col1:
194
- st.metric("Accuracy", f"{accuracy_score(y, y_pred):.2%}")
195
- with col2:
196
- st.metric("Precision", f"{precision_score(y, y_pred):.2%}")
197
- with col3:
198
- st.metric("Recall", f"{recall_score(y, y_pred):.2%}")
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
- if __name__ == '__main__':
202
- app()
 
12
  import plotly.express as px
13
  import plotly.graph_objects as go
14
 
15
+ def load_data():
16
+ data = pd.read_csv('exported_named_train_good.csv')
17
+ data_test = pd.read_csv('exported_named_test_good.csv')
18
+ X_train = data.drop("Target", axis=1)
19
+ y_train = data['Target']
20
+ X_test = data_test.drop('Target', axis=1)
21
+ y_test = data_test['Target']
22
+ return X_train, y_train, X_test, y_test, X_train.columns
23
 
24
+ def train_models(X_train, y_train, X_test, y_test):
 
 
 
 
 
 
 
25
  models = {
26
+ "Logistic Regression": LogisticRegression(random_state=42),
27
+ "Decision Tree": DecisionTreeClassifier(random_state=42),
28
+ "Random Forest": RandomForestClassifier(random_state=42),
29
+ "Gradient Boost": GradientBoostingClassifier(random_state=42)
30
  }
31
+
32
+ results = {}
33
+ for name, model in models.items():
34
+ model.fit(X_train, y_train)
35
+
36
+ # Predictions
37
+ y_train_pred = model.predict(X_train)
38
+ y_test_pred = model.predict(X_test)
39
+
40
+ # Metrics
41
+ results[name] = {
42
+ 'model': model,
43
+ 'train_metrics': {
44
+ 'accuracy': accuracy_score(y_train, y_train_pred),
45
+ 'f1': f1_score(y_train, y_train_pred, average='weighted'),
46
+ 'precision': precision_score(y_train, y_train_pred),
47
+ 'recall': recall_score(y_train, y_train_pred),
48
+ 'roc_auc': roc_auc_score(y_train, y_train_pred)
49
+ },
50
+ 'test_metrics': {
51
+ 'accuracy': accuracy_score(y_test, y_test_pred),
52
+ 'f1': f1_score(y_test, y_test_pred, average='weighted'),
53
+ 'precision': precision_score(y_test, y_test_pred),
54
+ 'recall': recall_score(y_test, y_test_pred),
55
+ 'roc_auc': roc_auc_score(y_test, y_test_pred)
56
+ }
57
+ }
58
+
59
+ return results
60
 
61
+ def plot_model_performance(results):
62
+ metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']
63
+ fig, axes = plt.subplots(1, 2, figsize=(15, 6))
64
+
65
+ # Training metrics
66
+ train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics]
67
+ for model in results.keys()}
68
+ train_df = pd.DataFrame(train_data, index=metrics)
69
+ train_df.plot(kind='bar', ax=axes[0], title='Training Performance')
70
+ axes[0].set_ylim(0, 1)
71
+
72
+ # Test metrics
73
+ test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
74
+ for model in results.keys()}
75
+ test_df = pd.DataFrame(test_data, index=metrics)
76
+ test_df.plot(kind='bar', ax=axes[1], title='Test Performance')
77
+ axes[1].set_ylim(0, 1)
78
+
79
+ plt.tight_layout()
80
+ return fig
81
+
82
+ def plot_feature_importance(model, feature_names, model_type):
83
+ plt.figure(figsize=(10, 6))
84
+
85
+ if model_type in ["Decision Tree", "Random Forest", "Gradient Boost"]:
86
+ importance = model.feature_importances_
87
+ elif model_type == "Logistic Regression":
88
+ importance = np.abs(model.coef_[0])
89
+
90
+ importance_df = pd.DataFrame({
91
+ 'feature': feature_names,
92
+ 'importance': importance
93
+ }).sort_values('importance', ascending=True)
94
+
95
+ plt.barh(importance_df['feature'], importance_df['importance'])
96
+ plt.title(f"Feature Importance - {model_type}")
97
+ return plt.gcf()
98
 
 
 
 
 
 
 
 
 
99
 
100
+ import streamlit as st
101
+ import pandas as pd
102
+ import numpy as np
103
+ import matplotlib.pyplot as plt
104
+ from sklearn.tree import DecisionTreeClassifier
105
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
106
+ from sklearn.linear_model import LogisticRegression
107
+ from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
108
+ import seaborn as sns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # Configuration de la page
111
+ st.set_page_config(layout="wide", page_title="ML Dashboard")
112
+
113
+ # Style personnalisé
114
+ st.markdown("""
115
+ <style>
116
+ /* Cartes stylisées */
117
+ div.css-1r6slb0.e1tzin5v2 {
118
+ background-color: #FFFFFF;
119
+ border: 1px solid #EEEEEE;
120
+ padding: 1.5rem;
121
+ border-radius: 10px;
122
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
123
+ }
124
+
125
+ /* Headers */
126
+ .main-header {
127
+ font-size: 2rem;
128
+ font-weight: 700;
129
+ color: #1E88E5;
130
+ text-align: center;
131
+ margin-bottom: 2rem;
132
+ }
133
 
134
+ /* Metric containers */
135
+ div.css-12w0qpk.e1tzin5v2 {
136
+ background-color: #F8F9FA;
137
+ padding: 1rem;
138
+ border-radius: 8px;
139
+ text-align: center;
140
+ }
141
+
142
+ /* Metric values */
143
+ div.css-1xarl3l.e16fv1kl1 {
144
+ font-size: 1.8rem;
145
+ font-weight: 700;
146
+ color: #1E88E5;
147
+ }
148
+ </style>
149
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ def plot_performance_comparison(results, metric='test_metrics'):
152
+ """Crée un graphique de comparaison des performances avec des couleurs distinctes"""
153
+ metrics = ['accuracy', 'f1', 'recall', 'roc_auc']
154
+ model_names = list(results.keys())
155
+
156
+ # Définir des couleurs distinctes pour chaque modèle
157
+ colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
158
+
159
+ data = {model: [results[model][metric][m] for m in metrics]
160
+ for model in model_names}
161
+
162
+ fig, ax = plt.subplots(figsize=(10, 6))
163
+ x = np.arange(len(metrics))
164
+ width = 0.2
165
+
166
+ for i, (model, values) in enumerate(data.items()):
167
+ ax.bar(x + i*width, values, width, label=model, color=colors[i])
168
+
169
+ ax.set_ylabel('Score')
170
+ ax.set_title(f'Comparaison des performances ({metric.split("_")[0].title()})')
171
+ ax.set_xticks(x + width * (len(model_names)-1)/2)
172
+ ax.set_xticklabels(metrics)
173
+ ax.legend()
174
+ ax.grid(True, alpha=0.3)
175
+ plt.ylim(0, 1)
176
+
177
+ return fig
178
 
179
+ def create_metric_card(title, value):
180
+ """Crée une carte de métrique stylisée"""
181
+ st.markdown(f"""
182
+ <div style="
183
+ background-color: white;
184
+ padding: 1rem;
185
+ border-radius: 8px;
186
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
187
+ text-align: center;
188
+ margin-bottom: 1rem;
189
+ ">
190
+ <h3 style="color: #666; font-size: 1rem; margin-bottom: 0.5rem;">{title}</h3>
191
+ <p style="color: #1E88E5; font-size: 1.8rem; font-weight: bold; margin: 0;">{value:.3f}</p>
192
+ </div>
193
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
+ def app():
196
+ # Header
197
+ st.markdown('<h1 class="main-header">Tableau de Bord ML</h1>', unsafe_allow_html=True)
198
+
199
+ # Charger et préparer les données
200
+ X_train, y_train, X_test, y_test, feature_names = load_data()
201
+
202
+ # Sidebar pour la sélection du modèle
203
+ with st.sidebar:
204
+ st.markdown('<h2 style="color: #1E88E5;">Configuration</h2>', unsafe_allow_html=True)
205
+ selected_model = st.selectbox(
206
+ "Sélectionner un modèle",
207
+ ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boost"]
208
+ )
209
+
210
+ # Entraînement des modèles si pas déjà fait
211
+ if 'model_results' not in st.session_state:
212
+ with st.spinner("⏳ Entraînement des modèles..."):
213
+ st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
214
+
215
+ # Layout principal
216
+ col1, col2 = st.columns([2, 1])
217
+
218
+ with col1:
219
+ # Graphiques de performance
220
+ st.markdown("### 📊 Comparaison des Performances")
221
+
222
+ tab1, tab2 = st.tabs(["🎯 Test", "📈 Entraînement"])
223
+
224
+ with tab1:
225
+ fig_test = plot_performance_comparison(st.session_state.model_results, 'test_metrics')
226
+ st.pyplot(fig_test)
227
+
228
+ with tab2:
229
+ fig_train = plot_performance_comparison(st.session_state.model_results, 'train_metrics')
230
+ st.pyplot(fig_train)
231
+
232
+ with col2:
233
+ # Métriques détaillées du modèle sélectionné
234
+ st.markdown(f"### 📌 Métriques - {selected_model}")
235
+
236
+ metrics = st.session_state.model_results[selected_model]['test_metrics']
237
+ for metric, value in metrics.items():
238
+ if metric != 'precision': # On exclut la précision
239
+ create_metric_card(metric.upper(), value)
240
+
241
+ # Section inférieure
242
+ st.markdown("### 🔍 Analyse Détaillée")
243
+ col3, col4 = st.columns(2)
244
+
245
+ with col3:
246
+ # Feature Importance
247
+ current_model = st.session_state.model_results[selected_model]['model']
248
+ if hasattr(current_model, 'feature_importances_') or hasattr(current_model, 'coef_'):
249
+ fig_importance = plt.figure(figsize=(10, 6))
250
+ if hasattr(current_model, 'feature_importances_'):
251
+ importances = current_model.feature_importances_
252
+ else:
253
+ importances = np.abs(current_model.coef_[0])
254
+
255
+ plt.barh(feature_names, importances)
256
+ plt.title("Importance des Caractéristiques")
257
+ st.pyplot(fig_importance)
258
+
259
+ with col4:
260
+ # Matrice de corrélation
261
+ fig_corr = plt.figure(figsize=(10, 8))
262
+ sns.heatmap(X_train.corr(), annot=True, cmap='coolwarm', center=0)
263
+ plt.title("Matrice de Corrélation")
264
+ st.pyplot(fig_corr)
265
 
266
+ if __name__ == "__main__":
267
+ app()