Spaces:

Aliou12
/

streamlit-app

Sleeping

App Files Files Community

Aliou12 commited on Jan 30

Commit

9747b16

1 Parent(s): adf12c1

new

Browse files

Files changed (1) hide show

app.py +19 -5

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import scipy.stats as stats
 import statsmodels.api as sm
 import statsmodels.formula.api as smf
 from sklearn.cluster import KMeans
 from statsmodels.stats.multicomp import pairwise_tukeyhsd
 # 🏠 Titre de l'application
@@ -43,11 +44,17 @@ if uploaded_file is not None:
     # 🔹 Test de normalité des résidus (Shapiro-Wilk)
     model = smf.ols('Rating ~ C(Product_line) * C(Payment)', data=data).fit()
     residuals = model.resid
-    shapiro_test = stats.shapiro(residuals)
     st.write(f"✅ Test de Shapiro-Wilk (Normalité) : **p-value = {shapiro_test.pvalue:.4f}**")
     # 🔹 Test d'homogénéité des variances (Levene)
-    group_list = [data['Rating'][data['Product_line'] == cat] for cat in data['Product_line'].unique()]
     levene_test = stats.levene(*group_list)
     st.write(f"✅ Test de Levene (Homogénéité des variances) : **p-value = {levene_test.pvalue:.4f}**")
@@ -64,8 +71,12 @@ if uploaded_file is not None:
     # ============================
     st.subheader("📌 Comparaisons Post-Hoc (Tukey HSD)")
-    tukey = pairwise_tukeyhsd(data['Rating'], data['Product_line'])
-    st.write(tukey.summary())
     # ============================
     # 📊 Visualisation des Résultats
@@ -98,8 +109,11 @@ if uploaded_file is not None:
     # ============================
     st.subheader("🎯 Clustering des Clients (K-Means)")
     kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
-    data['Cluster'] = kmeans.fit_predict(data[['Rating']])
     # 🔹 Visualisation du Clustering
     fig, ax = plt.subplots(figsize=(8, 5))

 import statsmodels.api as sm
 import statsmodels.formula.api as smf
 from sklearn.cluster import KMeans
+from sklearn.preprocessing import LabelEncoder
 from statsmodels.stats.multicomp import pairwise_tukeyhsd
 # 🏠 Titre de l'application
     # 🔹 Test de normalité des résidus (Shapiro-Wilk)
     model = smf.ols('Rating ~ C(Product_line) * C(Payment)', data=data).fit()
     residuals = model.resid
+    if len(residuals) > 5000:
+        residuals_sample = residuals.sample(5000, random_state=42)
+    else:
+        residuals_sample = residuals
+    shapiro_test = stats.shapiro(residuals_sample)
     st.write(f"✅ Test de Shapiro-Wilk (Normalité) : **p-value = {shapiro_test.pvalue:.4f}**")
     # 🔹 Test d'homogénéité des variances (Levene)
+    group_list = [group.dropna().values for _, group in data.groupby('Product_line')['Rating']]
     levene_test = stats.levene(*group_list)
     st.write(f"✅ Test de Levene (Homogénéité des variances) : **p-value = {levene_test.pvalue:.4f}**")
     # ============================
     st.subheader("📌 Comparaisons Post-Hoc (Tukey HSD)")
+    if np.issubdtype(data['Rating'].dtype, np.number):
+        tukey = pairwise_tukeyhsd(data['Rating'], data['Product_line'])
+        st.write(tukey.summary())
+    else:
+        st.error("Erreur : La colonne 'Rating' doit être numérique pour le test de Tukey.")
     # ============================
     # 📊 Visualisation des Résultats
     # ============================
     st.subheader("🎯 Clustering des Clients (K-Means)")
+    encoder = LabelEncoder()
+    data['Product_line_encoded'] = encoder.fit_transform(data['Product_line'])
     kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
+    data['Cluster'] = kmeans.fit_predict(data[['Rating', 'Product_line_encoded']])
     # 🔹 Visualisation du Clustering
     fig, ax = plt.subplots(figsize=(8, 5))