import streamlit as st import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from io import BytesIO # Importa BytesIO per gestire file in memoria try: import scipy.stats # Per correlazione spearman opzionale SCIPY_AVAILABLE = True except ImportError: SCIPY_AVAILABLE = False # Sposta l'avviso della libreria scipy dopo il caricamento del file, # così non appare se non viene caricato nessun file. # st.sidebar.warning("Libreria 'scipy' non trovata...") # Rimosso da qui # --- Configuration --- st.set_page_config(layout="wide", page_title="Dashboard Analisi Clima") # --- Constants & Helper Functions --- SCORE_BUCKETS = { (0, 2.5): "Critico", (2.5, 4.5): "Neutrale", (4.5, 7): "Positivo" # Assumendo scala fino a 6, ma 7 copre > 4.5 } BUCKET_COLORS = {"Critico": "#d62728", "Neutrale": "#ff7f0e", "Positivo": "#2ca02c"} THRESHOLD_LOW = 3.0 # Leggermente aggiustato per bullet chart THRESHOLD_HIGH = 4.5 # Leggermente aggiustato per bullet chart PLOTLY_TEMPLATE = "plotly_white" # "seaborn" #"plotly_dark" # "ggplot2" # "plotly_white" def categorize_score(score): if pd.isna(score): return "Non Risposto" # Ajust range slightly to handle edge cases like 2.5 exactly if 0 <= score <= 2.5: return "Critico" if 2.5 < score <= 4.5: return "Neutrale" if 4.5 < score <= 7: return "Positivo" # Assuming max score is around 6 return "Sconosciuto" # Should not happen with numeric data in expected range @st.cache_data # Modifica la funzione per accettare l'oggetto file caricato invece del percorso def load_and_prepare_data(uploaded_file_object): if uploaded_file_object is None: return None, None, None, None, None, None, None try: # Legge direttamente dall'oggetto file in memoria # Explicitly try different encodings if default fails try: # Usa BytesIO per permettere a read_csv di rileggere se necessario file_content = BytesIO(uploaded_file_object.getvalue()) df_orig = pd.read_csv(file_content, delimiter=';', encoding='utf-8') except UnicodeDecodeError: try: file_content.seek(0) # Riavvolgi il buffer df_orig = pd.read_csv(file_content, delimiter=';', encoding='latin-1') except UnicodeDecodeError: file_content.seek(0) # Riavvolgi il buffer df_orig = pd.read_csv(file_content, delimiter=';', encoding='iso-8859-1') # Rimuovi FileNotFoundError dato che non usiamo più un percorso fisso # except FileNotFoundError: # st.error(f"Errore: File non trovato...") # Rimosso # return None, None, None, None, None, None, None except Exception as e: st.error(f"Errore durante la lettura del CSV caricato: {e}") return None, None, None, None, None, None, None # --- Il resto della funzione di preparazione dati rimane invariato --- original_columns = df_orig.columns.tolist() unnamed_cols = [col for col in df_orig.columns if str(col).startswith('Unnamed:')] df = df_orig.drop(columns=unnamed_cols) cleaned_original_columns = df.columns.tolist() # Update after drop header_row_index = 0 # Assuming header is the first row after loading new_header = df.iloc[header_row_index].tolist() df = df[header_row_index + 1:].reset_index(drop=True) # Clean the header: replace NaN/None with placeholders, ensure strings, strip whitespace cleaned_header = [] for i, col in enumerate(new_header): col_str = str(col).strip() if pd.notna(col) else "" if not col_str: # If empty after stripping if i < len(cleaned_original_columns) and not cleaned_original_columns[i].startswith('Unnamed:'): cleaned_header.append(str(cleaned_original_columns[i]).strip()) # Use original name if meaningful else: cleaned_header.append(f"Colonna_Sconosciuta_{i}") # Placeholder else: cleaned_header.append(col_str) # *** START: Enhanced Duplicate Column Handling *** counts = {} final_header = [] original_to_final_map = {} # Map original cleaned name to final unique name for i, col_name in enumerate(cleaned_header): original_name = col_name # Keep track of the name before potential suffix if col_name in counts: counts[col_name] += 1 new_name = f"{col_name}_{counts[col_name]}" final_header.append(new_name) # Store mapping if original name was intended as a question # Heuristic: assume non-demographic columns are potential questions if i >= 3: # Assuming first 3 are demo - adjust if needed original_to_final_map[original_name] = original_to_final_map.get(original_name, []) + [new_name] else: counts[col_name] = 0 final_header.append(col_name) if i >= 3: original_to_final_map[original_name] = [col_name] # First occurrence df.columns = final_header # *** END: Enhanced Duplicate Column Handling *** # --- Category Mapping --- def get_category_from_original(original_col_name, potential_category_source): col_name_str = str(original_col_name).strip() source_str = str(potential_category_source).strip() if pd.notna(potential_category_source) and not source_str.isdigit() and 'domanda' not in source_str.lower(): base_name = source_str.split('.')[0].strip() if base_name: return base_name if '.' in col_name_str: base_name = col_name_str.split('.')[0].strip() suffix = col_name_str.split('.')[-1] if suffix.isdigit(): if base_name: return base_name elif not col_name_str.isdigit() and 'domanda' not in col_name_str.lower(): if col_name_str: return col_name_str return "Categoria Sconosciuta" question_to_category_map = {} demographic_indices = list(range(min(3, len(final_header)))) # Safer range for demo indices for i, final_col_name in enumerate(final_header): if i not in demographic_indices: # Find the original cleaned header name before potential suffix was added original_cleaned_name = final_col_name if '_' in final_col_name: parts = final_col_name.rsplit('_', 1) if parts[1].isdigit() and int(parts[1]) == counts.get(parts[0], -1): original_cleaned_name = parts[0] # Use original column name from the CSV *before* taking row 0 as header for category inference original_csv_col = cleaned_original_columns[i] if i < len(cleaned_original_columns) else original_cleaned_name category = get_category_from_original(original_csv_col, original_csv_col) category = category.replace("Parità di genere", "Parità Genere") question_to_category_map[final_col_name] = category # Map the *final unique* column name # --- Demographic Columns --- demographic_map = {} if len(final_header) > 0: demographic_map[final_header[0]] = 'Genere' if len(final_header) > 1: demographic_map[final_header[1]] = 'Fascia_Eta' if len(final_header) > 2: demographic_map[final_header[2]] = 'Sede' # Check if default demo columns actually exist before renaming valid_demo_map = {k: v for k, v in demographic_map.items() if k in df.columns} df.rename(columns=valid_demo_map, inplace=True) demographic_cols = list(valid_demo_map.values()) # Filter out potential summary rows if 'Sede' in df.columns: anomalous_sede = ['Media', 'Mediana', 'Media sezione', 'Totale', 'Scarto quadratico medio'] df = df[~df['Sede'].astype(str).str.strip().str.lower().isin([s.lower() for s in anomalous_sede])] # Fill missing demographic data for col in demographic_cols: if col in df.columns: df[col] = df[col].astype(str).fillna('Non specificato').replace(['nan', 'None', ''], 'Non specificato') # Identify question columns based on the map (using final unique names) question_cols = list(question_to_category_map.keys()) question_cols = [col for col in question_cols if col in df.columns] # Ensure they exist # --- Type Conversion --- for col in question_cols: if df[col].dtype == 'object': df[col] = df[col].astype(str).str.replace(',', '.', regex=False) df[col] = df[col].replace(['nan', 'N/A', '', '-', 'None'], np.nan, regex=False) df[col] = pd.to_numeric(df[col], errors='coerce') numeric_question_cols = df[question_cols].select_dtypes(include=np.number).columns.tolist() # Determine response scale dynamically response_scale = (1, 6) # Default fallback if numeric_question_cols: valid_numeric_cols = [col for col in numeric_question_cols if col in df.columns] if valid_numeric_cols: # Drop rows where ALL numeric questions are NaN before calculating min/max df_numeric_only = df[valid_numeric_cols].dropna(how='all') if not df_numeric_only.empty: min_val = df_numeric_only.min(skipna=True).min(skipna=True) max_val = df_numeric_only.max(skipna=True).max(skipna=True) if pd.notna(min_val) and pd.notna(max_val): response_scale = (min_val, max_val) # --- Identify Overall Satisfaction Question --- overall_satisfaction_question = None possible_satisfaction_cats = ['Riepilogo', 'Generale', 'Soddisfazione Complessiva'] # Use final unique names from numeric_question_cols possible_satisfaction_cols = [q for q in numeric_question_cols if question_to_category_map.get(q) in possible_satisfaction_cats] if possible_satisfaction_cols: overall_satisfaction_question = possible_satisfaction_cols[0] else: keywords = ['soddisfazione', 'complessivamente', 'generale', 'valutazione'] for q in numeric_question_cols: # Check original cleaned name for keywords if available, else the final name original_cleaned_name = q.rsplit('_', 1)[0] if '_' in q and q.rsplit('_', 1)[1].isdigit() else q q_check = original_cleaned_name.lower() # Check original name primarily if any(keyword in q_check for keyword in keywords): overall_satisfaction_question = q # Assign the final unique name st.info(f"Domanda soddisfazione generale identificata: '{q}' (basata su '{original_cleaned_name}')") break if not overall_satisfaction_question and numeric_question_cols: st.warning("Impossibile identificare automaticamente la domanda sulla soddisfazione generale. Alcune analisi potrebbero essere limitate.") return df, demographic_cols, question_cols, question_to_category_map, numeric_question_cols, response_scale, overall_satisfaction_question # --- Inizio Script Principale --- # Aggiungi il widget per caricare il file st.sidebar.title('Sondaggio') uploaded_file = st.sidebar.file_uploader("Carica il tuo file CSV", type="csv") st.sidebar.divider() # Procedi solo se un file è stato caricato if uploaded_file is not None: # Sposta l'avviso della libreria scipy qui, così appare solo se si procede if not SCIPY_AVAILABLE: st.sidebar.warning("Libreria 'scipy' non trovata. La correlazione Spearman non sarà disponibile. Installa con: pip install scipy") # --- Load Data --- # Chiama la funzione di caricamento passando l'oggetto file caricato try: df_full, demographic_cols, question_cols, question_to_category_map, numeric_question_cols, response_scale, overall_satisfaction_question = load_and_prepare_data(uploaded_file) if df_full is None: st.error("Caricamento o preparazione dati fallito. Controlla il file CSV.") st.stop() # Ferma l'esecuzione se il caricamento fallisce elif df_full.empty: st.warning("Il file CSV caricato risulta vuoto dopo la pulizia iniziale.") # Si potrebbe fermare qui o continuare mostrando avvisi di dati vuoti # st.stop() except Exception as e: st.error(f"Errore critico durante l'inizializzazione dei dati dal file caricato: {e}") st.exception(e) # Stampa traceback completo per debug st.stop() # Ferma l'esecuzione in caso di errore critico # --- DA QUI IN POI, IL CODICE DEL DASHBOARD RIMANE INVARIATO --- # --- MA VIENE ESEGUITO SOLO SE uploaded_file IS NOT None --- # --- App Title --- st.title("🚀 Dashboard Analisi Clima") # ============================================================================== # --- Sidebar --- # ============================================================================== st.sidebar.title("Filtri & Controlli") st.sidebar.subheader("👤 Filtri Demografici") selected_filters = {} if demographic_cols: # Use df_full for filter options to show all possibilities for demo_col in demographic_cols: # Ensure the column exists in df_full before creating filter if demo_col in df_full.columns: unique_values = sorted(df_full[demo_col].astype(str).unique()) if len(unique_values) > 1: selected_filters[demo_col] = st.sidebar.multiselect( f"{demo_col}", options=unique_values, default=unique_values ) else: # If only one value, no need for multiselect, just store it selected_filters[demo_col] = unique_values else: st.sidebar.warning(f"Colonna demografica '{demo_col}' definita ma non trovata nel DataFrame.") # Apply filters - start from df_full each time filters change df_filtered = df_full.copy() for col, selected_values in selected_filters.items(): # Check if the column exists in df_filtered before applying the filter if col in df_filtered.columns and selected_values: # Ensure selected_values are strings for comparison if the column is string if df_filtered[col].dtype == 'object': selected_values_str = [str(v) for v in selected_values] df_filtered = df_filtered[df_filtered[col].astype(str).isin(selected_values_str)] else: # Keep original type for non-object columns if filtering is needed df_filtered = df_filtered[df_filtered[col].isin(selected_values)] else: st.sidebar.warning("Nessuna colonna demografica valida trovata per i filtri.") df_filtered = df_full.copy() if df_full is not None else pd.DataFrame() # Use full data if available, else empty st.sidebar.divider() st.sidebar.subheader("📊 Metriche Chiave (Filtrate)") # Recalculate total respondents after filtering total_respondents_filtered = len(df_filtered) if df_filtered is not None else 0 st.sidebar.metric("Rispondenti Filtrati", total_respondents_filtered) # --- Calculate metrics only if df_filtered is not empty --- avg_overall_filtered = np.nan avg_scores_per_category_f = pd.Series(dtype=float) driver_df = pd.DataFrame() # Initialize empty driver dataframe # Default correlation method corr_method_sidebar = 'pearson' if SCIPY_AVAILABLE: corr_method_sidebar = 'spearman' # Prefer Spearman if scipy is available if df_filtered is not None and not df_filtered.empty and numeric_question_cols: # Ensure overall satisfaction question exists in the filtered numeric columns if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns and pd.api.types.is_numeric_dtype(df_filtered[overall_satisfaction_question]): overall_sat_data = df_filtered[overall_satisfaction_question].dropna() if not overall_sat_data.empty: avg_overall_filtered = overall_sat_data.mean() midpoint = (response_scale[0] + response_scale[1]) / 2 if response_scale else 3.5 # Fallback midpoint delta_vs_mid = avg_overall_filtered - midpoint st.sidebar.metric("Soddisfazione Generale Media", f"{avg_overall_filtered:.2f}", f"{delta_vs_mid:+.2f} vs Midpoint ({midpoint:.1f})") else: st.sidebar.metric("Soddisfazione Generale Media", "N/D (no data)") else: st.sidebar.metric("Soddisfazione Generale Media", "N/D (Domanda non trovata/valida)") # Calculate category averages on filtered data numeric_cols_in_filtered = [col for col in numeric_question_cols if col in df_filtered.columns] if numeric_cols_in_filtered: avg_scores_per_question_f = df_filtered[numeric_cols_in_filtered].mean(axis=0, skipna=True) df_avg_scores_f = pd.DataFrame({'Domanda': avg_scores_per_question_f.index, 'Punteggio Medio': avg_scores_per_question_f.values}) df_avg_scores_f['Categoria'] = df_avg_scores_f['Domanda'].map(question_to_category_map).fillna("Senza Categoria") df_avg_scores_f.dropna(subset=['Punteggio Medio'], inplace=True) if not df_avg_scores_f.empty: # Exclude "Senza Categoria" from min/max display if desired avg_scores_valid_cat = df_avg_scores_f[df_avg_scores_f['Categoria'] != "Senza Categoria"] if not avg_scores_valid_cat.empty: avg_scores_per_category_f = avg_scores_valid_cat.groupby('Categoria')['Punteggio Medio'].mean().sort_values() if not avg_scores_per_category_f.empty: min_cat_score = avg_scores_per_category_f.iloc[0] max_cat_score = avg_scores_per_category_f.iloc[-1] delta_min = f"{min_cat_score - avg_overall_filtered:.2f} vs Sod. Gen." if not np.isnan(avg_overall_filtered) else None delta_max = f"{max_cat_score - avg_overall_filtered:.2f} vs Sod. Gen." if not np.isnan(avg_overall_filtered) else None st.sidebar.metric(f"⚠️ Cat. Punteggio MIN", f"{avg_scores_per_category_f.index[0]} ({min_cat_score:.2f})", delta_min, delta_color="inverse") st.sidebar.metric(f"✅ Cat. Punteggio MAX", f"{avg_scores_per_category_f.index[-1]} ({max_cat_score:.2f})", delta_max, delta_color="normal") else: st.sidebar.text("N/D per Categorie (Vuote dopo agg.)") else: st.sidebar.text("N/D per Categorie (Solo 'Senza Cat.')") else: st.sidebar.text("N/D per Categorie (No medie domande)") else: st.sidebar.text("N/D per Categorie (No colonne numeriche)") # --- Calculate Driver Data (Correlation) --- if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns and pd.api.types.is_numeric_dtype(df_filtered[overall_satisfaction_question]): # Ensure overall satisfaction has variance if df_filtered[overall_satisfaction_question].nunique(dropna=True) > 1: driver_candidate_cols = [col for col in numeric_cols_in_filtered if col != overall_satisfaction_question and df_filtered[col].nunique(dropna=True) > 1] if driver_candidate_cols: try: # Calculate correlations correlations = df_filtered[driver_candidate_cols].corrwith(df_filtered[overall_satisfaction_question], method=corr_method_sidebar).dropna() # Calculate average scores for the same candidates avg_scores_drivers = df_filtered[driver_candidate_cols].mean(skipna=True) # Combine into driver_df if not correlations.empty: driver_df = pd.DataFrame({'Correlazione': correlations}) # Add avg scores safely, aligning index driver_df = driver_df.join(avg_scores_drivers.rename('Punteggio Medio'), how='inner') # Inner join ensures only questions with both corr and avg score remain if not driver_df.empty: driver_df['Categoria'] = driver_df.index.map(question_to_category_map).fillna("Senza Categoria") driver_df.dropna(subset=['Categoria', 'Correlazione', 'Punteggio Medio'], inplace=True) # Drop if essential data missing if not driver_df.empty: driver_df['Domanda'] = driver_df.index driver_df['Domanda_Breve'] = driver_df['Domanda'].apply(lambda x: str(x)[:47] + "..." if len(str(x)) > 50 else str(x)) driver_df['Correlazione_Abs'] = driver_df['Correlazione'].abs() else: driver_df = pd.DataFrame() # Ensure it's empty if join fails else: st.sidebar.info("Nessuna correlazione significativa calcolata per i driver.") except Exception as e: st.sidebar.warning(f"Errore nel calcolo correlazioni driver: {e}") else: st.sidebar.info("Nessuna domanda candidata (con varianza) trovata per l'analisi driver.") else: st.sidebar.info("La domanda di soddisfazione generale non ha varianza nei dati filtrati.") else: # If df_filtered is empty or no numeric questions st.sidebar.text("Dati insufficienti o non disponibili per le metriche.") if total_respondents_filtered == 0: st.sidebar.text("Nessun rispondente selezionato.") st.sidebar.metric("Soddisfazione Generale Media", "N/D") st.sidebar.text("N/D per Categorie") st.sidebar.divider() st.sidebar.info("Utilizza i filtri per esplorare i dati. Le metriche e i grafici si aggiornano dinamicamente.") # ============================================================================== # --- Create Tabs --- # ============================================================================== tab_list = [ "🎯 Sintesi Chiave", "🗺️ Mappa Domande", # New Tab for Question Map "👥 Demografia Dettagliata", "📊 Generale & Categorie", "🔍 Confronti & Driver", "📈 Grafici Avanzati" ] tabs = st.tabs(tab_list) # Assign tabs to variables dynamically for easier access tab_summary = tabs[0] tab_map = tabs[1] tab_demo = tabs[2] tab_overall = tabs[3] tab_comp = tabs[4] tab_advanced = tabs[5] # ============================================================================== # --- TAB Summary: Key Takeaways --- # ============================================================================== with tab_summary: # Content remains largely the same, but relies on variables calculated in sidebar st.header("🎯 Sintesi Chiave (Basata sui Filtri Correnti)") if df_filtered is None or df_filtered.empty: st.warning("Nessun dato disponibile con i filtri selezionati.") else: st.markdown(f"Analisi basata su **{total_respondents_filtered}** rispondenti.") col_s1, col_s2, col_s3 = st.columns([2, 1, 1]) # Adjusted columns for gauge with col_s1: st.subheader("Punti Salienti:") if not np.isnan(avg_overall_filtered): max_scale = response_scale[1] if response_scale else 6 # Fallback max scale st.markdown(f"- **Soddisfazione Generale:** {avg_overall_filtered:.2f} / {max_scale:.0f}") else: st.markdown(f"- **Soddisfazione Generale:** N/D") if not avg_scores_per_category_f.empty: st.markdown(f"- **Area Più Forte:** {avg_scores_per_category_f.index[-1]} (Media: {avg_scores_per_category_f.iloc[-1]:.2f})") st.markdown(f"- **Area Più Debole:** {avg_scores_per_category_f.index[0]} (Media: {avg_scores_per_category_f.iloc[0]:.2f})") else: st.markdown("- Dati categorie non disponibili.") # Driver info from pre-calculated driver_df if not driver_df.empty: try: # Top positive driver top_driver = driver_df.sort_values('Correlazione', ascending=False).iloc[0] st.markdown(f"- **Driver Positivo Principale:** {top_driver['Domanda_Breve']} (Corr: {top_driver['Correlazione']:.2f})") # Top area for improvement (high correlation, low score) - using dynamic means avg_corr_summary = driver_df['Correlazione'].mean() avg_score_summary = driver_df['Punteggio Medio'].mean() potential_improvement_df = driver_df[(driver_df['Correlazione'] > avg_corr_summary) & (driver_df['Punteggio Medio'] < avg_score_summary)] if not potential_improvement_df.empty: potential_improvement = potential_improvement_df.sort_values('Punteggio Medio').iloc[0] # Lowest score among high-impact, low-perf st.markdown(f"- **Focus Miglioramento:** {potential_improvement['Domanda_Breve']} (Score: {potential_improvement['Punteggio Medio']:.2f}, Corr: {potential_improvement['Correlazione']:.2f})") else: st.markdown("- *Focus Miglioramento:* (Nessun driver critico trovato con medie correnti)") except IndexError: st.markdown("- *Driver Principali:* (Errore nell'accesso ai dati driver)") except Exception as e: st.markdown(f"- *Driver Principali:* (Errore: {e})") else: st.markdown("- *Driver Principali:* (Dati non disponibili o insufficienti)") with col_s2: st.subheader("Sentiment") # Combined Pie and Gauge if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns: overall_satisfaction_data_f = df_filtered[overall_satisfaction_question].dropna() if pd.api.types.is_numeric_dtype(overall_satisfaction_data_f) and not overall_satisfaction_data_f.empty: # Sentiment Pie Chart bucket_counts = overall_satisfaction_data_f.apply(categorize_score).value_counts() # Add 'Non Risposto' if it exists # non_risposto_count = df_filtered[overall_satisfaction_question].isna().sum() # Needs careful handling if mixing counts and percentages bucket_counts = bucket_counts.reindex(list(BUCKET_COLORS.keys()) + ["Non Risposto"], fill_value=0) # Ensure all buckets + Non Risposto bucket_perc = (bucket_counts / bucket_counts.sum() * 100) if bucket_counts.sum() > 0 else bucket_counts # Define colors including for "Non Risposto" plot_colors = BUCKET_COLORS.copy() plot_colors["Non Risposto"] = "#bbbbbb" # Grey for non-responded fig_sentiment_pie = px.pie(values=bucket_perc.values, names=bucket_perc.index, title="Distribuzione Sentiment", hole=0.4, color=bucket_perc.index, color_discrete_map=plot_colors, template=PLOTLY_TEMPLATE) fig_sentiment_pie.update_traces(textinfo='percent+label', sort=False, # Keep defined order pull=[0.05 if name=="Critico" else 0 for name in bucket_perc.index]) fig_sentiment_pie.update_layout(showlegend=False, margin=dict(t=30, b=10, l=10, r=10), height=250) # Compact layout st.plotly_chart(fig_sentiment_pie, use_container_width=True) else: st.write("Dati soddisfazione non numerici/vuoti.") else: st.write("Domanda soddisfazione non trovata.") with col_s3: st.subheader("Valore Medio") if not np.isnan(avg_overall_filtered): min_scale, max_scale = response_scale if response_scale else (1, 6) midpoint = (min_scale + max_scale) / 2 fig_gauge = go.Figure(go.Indicator( mode = "gauge+number", value = avg_overall_filtered, domain = {'x': [0, 1], 'y': [0, 1]}, title = {'text': "Soddisfazione Generale", 'font': {'size': 16}}, gauge = { 'axis': {'range': [min_scale, max_scale], 'tickwidth': 1, 'tickcolor': "darkblue"}, 'bar': {'color': "steelblue"}, 'bgcolor': "white", 'borderwidth': 2, 'bordercolor': "gray", 'steps': [ {'range': [min_scale, THRESHOLD_LOW], 'color': BUCKET_COLORS['Critico']}, {'range': [THRESHOLD_LOW, THRESHOLD_HIGH], 'color': BUCKET_COLORS['Neutrale']}, {'range': [THRESHOLD_HIGH, max_scale], 'color': BUCKET_COLORS['Positivo']}], 'threshold': { 'line': {'color': "black", 'width': 3}, 'thickness': 0.9, 'value': midpoint } # Show midpoint })) fig_gauge.update_layout(height=250, margin=dict(t=40, b=10, l=10, r=10)) # Compact layout st.plotly_chart(fig_gauge, use_container_width=True) else: st.write(" ") # Placeholder st.write(" ") st.info("Gauge non disponibile (media N/D).") st.markdown("---") st.subheader("Riflessioni Rapide:") satisfaction_text = f"{avg_overall_filtered:.2f}" if not np.isnan(avg_overall_filtered) else "N/D" strongest_area_text = f"{avg_scores_per_category_f.index[-1]}" if not avg_scores_per_category_f.empty else "N/D" weakest_area_text = f"{avg_scores_per_category_f.index[0]}" if not avg_scores_per_category_f.empty else "N/D" st.info(f""" Questa sintesi evidenzia i risultati principali per il gruppo selezionato ({total_respondents_filtered} persone). La soddisfazione generale si attesta a **{satisfaction_text}**. Le aree di forza (**{strongest_area_text}**) e di debolezza (**{weakest_area_text}**) richiedono attenzione specifica. Esplora le altre schede per dettagli, confronti e visualizzazioni avanzate. """) # ============================================================================== # --- TAB Map: Category -> Question Mapping --- # ============================================================================== with tab_map: st.header("🗺️ Mappa Categorie e Domande") st.write("Questa sezione mostra quali domande appartengono a ciascuna categoria identificata durante il caricamento dei dati.") if question_to_category_map: # Create DataFrame from the mapping dictionary map_df = pd.DataFrame(question_to_category_map.items(), columns=['Domanda', 'Categoria']) # Sort for better readability map_df = map_df.sort_values(by=['Categoria', 'Domanda']).reset_index(drop=True) st.dataframe(map_df, use_container_width=True) # Optional: Display grouped by category st.divider() st.subheader("Domande Raggruppate per Categoria") categories_in_map = map_df['Categoria'].unique() for category in sorted(categories_in_map): with st.expander(f"**{category}**"): questions_in_cat = map_df[map_df['Categoria'] == category]['Domanda'].tolist() for q in questions_in_cat: st.markdown(f"- {q}") else: st.warning("La mappa tra domande e categorie non è disponibile.") # ============================================================================== # --- TAB Demo: Demographics --- # ============================================================================== with tab_demo: st.header("👥 Analisi Demografica Dettagliata (Filtrata)") if df_filtered is None or df_filtered.empty: st.warning("Nessun dato disponibile con i filtri selezionati.") elif not demographic_cols: st.warning("Nessuna colonna demografica configurata per l'analisi.") else: st.write(f"Visualizzazione basata su **{len(df_filtered)}** rispondenti selezionati.") valid_demo_cols_plots = [col for col in demographic_cols if col in df_filtered.columns] # Use only valid cols for plotting if not valid_demo_cols_plots: st.warning("Nessuna colonna demografica valida trovata nei dati filtrati per la visualizzazione.") else: # --- Basic Distribution Pies --- st.subheader("Distribuzione Base") num_demo_cols = len(valid_demo_cols_plots) cols_pie = st.columns(num_demo_cols) pie_colors = [px.colors.qualitative.Pastel1, px.colors.qualitative.Pastel2, px.colors.qualitative.Set3] # Cycle through color schemes for i, demo_col in enumerate(valid_demo_cols_plots): with cols_pie[i % num_demo_cols]: # Cycle through columns if not df_filtered[demo_col].dropna().empty: # Define order for age if applicable category_orders = {} if 'Eta' in demo_col: age_order_guess = ['Fino a 30 anni', '31-40 anni', '41-50 anni', 'Oltre i 50 anni', 'Non specificato'] actual_ages = df_filtered[demo_col].unique() ordered_actual = [age for age in age_order_guess if age in actual_ages] ordered_actual.extend(sorted([age for age in actual_ages if age not in age_order_guess])) category_orders={demo_col: ordered_actual} fig_pie = px.pie(df_filtered.dropna(subset=[demo_col]), names=demo_col, hole=0.4, color_discrete_sequence=pie_colors[i % len(pie_colors)], template=PLOTLY_TEMPLATE, title=f"Per {demo_col}", category_orders=category_orders) fig_pie.update_traces(textposition='inside', textinfo='percent+label') fig_pie.update_layout(showlegend=False, title_x=0.5, margin=dict(t=40, b=0, l=0, r=0), height=300) st.plotly_chart(fig_pie, use_container_width=True) else: st.write(f"Dati '{demo_col}' non disponibili.") st.markdown("---") # --- Hierarchical Views: Sunburst & Treemap --- st.subheader("Visualizzazioni Gerarchiche/Proporzionali") if len(valid_demo_cols_plots) >= 2: # Need at least 2 demographics for interesting hierarchy chart_type_hier = st.radio("Scegli tipo grafico gerarchico:", ["Sunburst", "Treemap"], horizontal=True, key="hier_chart_sel") # Aggregate counts for combinations try: df_grouped_hier = df_filtered.groupby(valid_demo_cols_plots, observed=True).size().reset_index(name='Conteggio') if not df_grouped_hier.empty: # Use first valid demo col for coloring color_col_hier = valid_demo_cols_plots[0] if chart_type_hier == "Sunburst": fig_hier = px.sunburst(df_grouped_hier, path=valid_demo_cols_plots, values='Conteggio', title=f"Distribuzione Combinata (Sunburst): {', '.join(valid_demo_cols_plots)}", template=PLOTLY_TEMPLATE, color=color_col_hier, color_discrete_sequence=px.colors.qualitative.Pastel) fig_hier.update_layout(margin=dict(t=50, l=25, r=25, b=25)) st.plotly_chart(fig_hier, use_container_width=True) elif chart_type_hier == "Treemap": fig_hier = px.treemap(df_grouped_hier, path=[px.Constant("Tutti")] + valid_demo_cols_plots, values='Conteggio', title=f"Distribuzione Combinata (Treemap): {', '.join(valid_demo_cols_plots)}", template=PLOTLY_TEMPLATE, color=color_col_hier, color_discrete_sequence=px.colors.qualitative.Pastel) fig_hier.update_layout(margin=dict(t=50, l=25, r=25, b=25)) st.plotly_chart(fig_hier, use_container_width=True) else: st.info("Nessun dato aggregato per la visualizzazione gerarchica.") except Exception as e: st.error(f"Errore durante l'aggregazione per il grafico gerarchico: {e}") else: st.info("Sono necessarie almeno due colonne demografiche valide per le visualizzazioni gerarchiche.") # ============================================================================== # --- TAB Overall: Overall, Categories & Questions --- # ============================================================================== with tab_overall: st.header("📊 Analisi Generale, Categorie e Domande (Filtrata)") if df_filtered is None or df_filtered.empty: st.warning("Nessun dato disponibile con i filtri selezionati.") else: # --- Overall Satisfaction Distribution --- st.subheader("⭐ Soddisfazione Generale Complessiva") if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns: overall_satisfaction_data_f = df_filtered[overall_satisfaction_question].dropna() if pd.api.types.is_numeric_dtype(overall_satisfaction_data_f) and not overall_satisfaction_data_f.empty: col_ov1, col_ov2 = st.columns([2,1]) with col_ov1: # Bar chart of distribution overall_counts_f = overall_satisfaction_data_f.value_counts().sort_index() fig_overall_satisfaction = px.bar(overall_counts_f, x=overall_counts_f.index, y=overall_counts_f.values, labels={'x': f'Punteggio ({response_scale[0]:.0f}-{response_scale[1]:.0f})', 'y': 'Numero Risposte'}, text_auto=True, color_discrete_sequence=px.colors.sequential.Blues_r, template=PLOTLY_TEMPLATE, title="Distribuzione Punteggi Soddisfazione Generale") fig_overall_satisfaction.update_layout(xaxis = dict(tickmode = 'linear', dtick=1), title_x=0.5) st.plotly_chart(fig_overall_satisfaction, use_container_width=True) with col_ov2: # Sentiment display st.write(" ") st.write(" ") st.write("**Distribuzione Sentiment:**") bucket_counts = overall_satisfaction_data_f.apply(categorize_score).value_counts() bucket_counts = bucket_counts.reindex(list(BUCKET_COLORS.keys()) + ["Non Risposto"], fill_value=0) total_valid_responses = bucket_counts.sum() if total_valid_responses > 0: bucket_perc = (bucket_counts / total_valid_responses * 100) plot_colors = BUCKET_COLORS.copy() plot_colors["Non Risposto"] = "#bbbbbb" for bucket in plot_colors.keys(): # Iterate in defined order if bucket in bucket_perc.index: # Check if bucket exists perc = bucket_perc.get(bucket, 0) count = bucket_counts.get(bucket, 0) st.markdown(f" **{bucket}:** {perc:.1f}% ({count})", unsafe_allow_html=True) else: st.write("Nessuna risposta valida per il sentiment.") else: st.warning("Dati soddisfazione generale non disponibili/numerici.") else: st.warning("Domanda soddisfazione generale non trovata.") st.markdown("---") # --- Category Averages --- st.subheader("📈 Punteggio Medio per Categoria") if not avg_scores_per_category_f.empty: cat_avg_chart_type = st.radio("Visualizza medie categorie come:", ["Bar Chart", "Bullet Chart"], horizontal=True, key="cat_avg_type") if cat_avg_chart_type == "Bar Chart": avg_scores_plot = avg_scores_per_category_f.copy() color_map = [] for score in avg_scores_plot.values: if score > THRESHOLD_HIGH: color_map.append(BUCKET_COLORS["Positivo"]) elif score < THRESHOLD_LOW: color_map.append(BUCKET_COLORS["Critico"]) else: color_map.append(BUCKET_COLORS["Neutrale"]) fig_avg_category = go.Figure(go.Bar( x=avg_scores_plot.values, y=avg_scores_plot.index, orientation='h', text=[f'{score:.2f}' for score in avg_scores_plot.values], marker_color=color_map )) fig_avg_category.update_traces(textposition='outside') fig_avg_category.update_layout( xaxis_title=f'Punteggio Medio ({response_scale[0]:.0f}-{response_scale[1]:.0f})', yaxis_title='Categoria', yaxis={'categoryorder':'total ascending'}, template=PLOTLY_TEMPLATE, title="Medie Categorie (Colorate per Soglia)") if not np.isnan(avg_overall_filtered): fig_avg_category.add_vline(x=avg_overall_filtered, line_width=2, line_dash="dash", line_color="grey", annotation_text="Media Sod. Gen.") st.plotly_chart(fig_avg_category, use_container_width=True) elif cat_avg_chart_type == "Bullet Chart": st.write("Grafico Bullet: Confronta la media di categoria con la media generale e le soglie.") min_scale, max_scale = response_scale if response_scale else (1, 6) avg_scores_plot = avg_scores_per_category_f.copy().sort_values(ascending=False) for category, score in avg_scores_plot.items(): fig_bullet = go.Figure(go.Indicator( mode = "gauge+number+delta", value = score, delta = {'reference': avg_overall_filtered, 'suffix': ' vs Media Gen.'} if not np.isnan(avg_overall_filtered) else None, title = {'text': category, 'font': {'size': 14}}, gauge = { 'shape': "bullet", 'axis': {'range': [min_scale, max_scale]}, 'threshold': { 'line': {'color': "black", 'width': 2}, 'thickness': 0.75, 'value': avg_overall_filtered if not np.isnan(avg_overall_filtered) else (min_scale+max_scale)/2 }, 'bgcolor': "white", 'steps': [ {'range': [min_scale, THRESHOLD_LOW], 'color': BUCKET_COLORS['Critico']}, {'range': [THRESHOLD_LOW, THRESHOLD_HIGH], 'color': BUCKET_COLORS['Neutrale']}, {'range': [THRESHOLD_HIGH, max_scale], 'color': BUCKET_COLORS['Positivo']}], 'bar': {'color': 'darkblue', 'thickness': 0.5} })) fig_bullet.update_layout(height=100, margin=dict(l=200, r=50, t=30, b=10)) st.plotly_chart(fig_bullet, use_container_width=True) else: st.warning("Impossibile calcolare medie per categoria (potrebbero essere tutte 'Senza Categoria' o vuote).") st.markdown("---") # --- Detailed Question Analysis --- st.subheader("❓ Analisi Dettagliata per Domanda") # Get categories present in the calculated averages categories_with_averages = avg_scores_per_category_f.index.unique().tolist() if not categories_with_averages: # Fallback: get categories from the original map if averages failed if question_to_category_map: categories_with_averages = sorted(list(set(question_to_category_map.values()))) if "Senza Categoria" in categories_with_averages: categories_with_averages.remove("Senza Categoria") if "Categoria Sconosciuta" in categories_with_averages: categories_with_averages.remove("Categoria Sconosciuta") else: categories_with_averages = [] if categories_with_averages: # Proceed only if there are valid categories col_q1, col_q2 = st.columns([1,1]) with col_q1: selected_category = st.selectbox("Seleziona Categoria:", options=categories_with_averages, key="cat_select_q") with col_q2: plot_type = st.radio("Tipo Grafico Domande:", ["Distribuzione % (Stacked)", "Conteggi (Bar)", "Box Plot"], horizontal=True, key="q_plot_type") if selected_category: st.write(f"**Dettaglio Domande: '{selected_category}'**") # Find questions mapped to the selected category, ensuring they are numeric and exist questions_in_category = [q for q, cat in question_to_category_map.items() if cat == selected_category and q in df_filtered.columns and q in numeric_question_cols] if not questions_in_category: st.write("Nessuna domanda numerica valida trovata per questa categoria nei dati filtrati.") else: # Prepare data for box plot if selected if plot_type == "Box Plot": df_box_cat = df_filtered[questions_in_category].copy() if not df_box_cat.empty: df_box_melted = df_box_cat.melt(var_name='Domanda', value_name='Punteggio') # Shorten question names for y-axis df_box_melted['Domanda_Breve'] = df_box_melted['Domanda'].apply(lambda x: x[:67]+"..." if len(x) > 70 else x) df_box_melted.dropna(subset=['Punteggio'], inplace=True) if not df_box_melted.empty: fig_box = px.box(df_box_melted, x='Punteggio', y='Domanda_Breve', orientation='h', title=f"Distribuzione Punteggi per Domanda in '{selected_category}'", template=PLOTLY_TEMPLATE, points=False) # points="all" can be noisy fig_box.update_layout(yaxis={'categoryorder':'total descending'}, height=max(400, len(questions_in_category)*50)) # Dynamic height st.plotly_chart(fig_box, use_container_width=True) else: st.warning("Nessun dato valido per il Box Plot dopo il dropna.") else: st.warning("DataFrame vuoto per il Box Plot.") else: # Stacked or Counts Bar Chart for question in questions_in_category: question_data_f = df_filtered[question].dropna() if pd.api.types.is_numeric_dtype(question_data_f) and not question_data_f.empty: avg_q = question_data_f.mean() q_display = question if len(question) < 100 else question[:97] + "..." st.markdown(f"**{q_display}** (Media: {avg_q:.2f})") if plot_type == "Conteggi (Bar)": counts_q = question_data_f.value_counts().sort_index() if not counts_q.empty: fig_q = px.bar(counts_q, x=counts_q.index, y=counts_q.values, labels={'x': 'Punteggio', 'y': 'Numero Risposte'}, text_auto='.2s', height=250, template=PLOTLY_TEMPLATE, color_discrete_sequence=px.colors.sequential.Blues_r) fig_q.update_layout(xaxis = dict(tickmode = 'linear', dtick=1), margin=dict(t=5, b=5, l=5, r=5)) st.plotly_chart(fig_q, use_container_width=True) else: st.caption("Nessun dato per questo grafico.") elif plot_type == "Distribuzione % (Stacked)": counts_q_norm = question_data_f.value_counts(normalize=True).sort_index() * 100 if not counts_q_norm.empty: counts_q_df = counts_q_norm.reset_index() counts_q_df.columns = ['Punteggio', 'Percentuale'] counts_q_df['Punteggio'] = counts_q_df['Punteggio'].astype(str) # For discrete colors # Define a color map for the scores in the stacked bar unique_scores = sorted(counts_q_df['Punteggio'].astype(float).unique()) colors = px.colors.sequential.Blues_r score_color_map = {str(score): colors[min(len(colors)-1, int((score - response_scale[0]) / (response_scale[1] - response_scale[0]) * len(colors)))] for score in unique_scores} fig_q = px.bar(counts_q_df, x='Percentuale', y=[' ']*len(counts_q_df), # Single bar color='Punteggio', orientation='h', text=[f"{p:.1f}%" for p in counts_q_df['Percentuale']], height=150, template=PLOTLY_TEMPLATE, color_discrete_map=score_color_map # Apply color map ) fig_q.update_layout(xaxis_ticksuffix="%", yaxis_title="", xaxis_title="% Rispondenti", legend_title="Punteggio", showlegend=True, margin=dict(t=5, b=5, l=5, r=5), xaxis_range=[0,100], yaxis_visible=False, legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)) fig_q.update_traces(textposition='inside', textfont_color='white') # Ensure text is visible st.plotly_chart(fig_q, use_container_width=True) else: st.caption("Nessun dato per questo grafico.") else: st.caption(f"Dati per '{question[:50]}...' non numerici o vuoti.") else: st.info("Nessuna categoria valida trovata per l'analisi dettagliata delle domande.") # ============================================================================== # --- TAB Comparisons: Comparisons, Drivers & More --- # ============================================================================== with tab_comp: st.header("🔍 Confronti Demografici & Analisi Driver (Filtrata)") if df_filtered is None or df_filtered.empty: st.warning("Nessun dato disponibile con i filtri selezionati.") elif not numeric_question_cols: st.warning("Nessuna domanda numerica trovata per le analisi di confronto.") else: # --- Prepare Melted Data --- @st.cache_data # Cache the melting process def get_melted_data(df, id_vars, value_vars, cat_map): if not value_vars: return pd.DataFrame() cols_to_melt = [col for col in id_vars + value_vars if col in df.columns] value_vars_valid = [col for col in value_vars if col in cols_to_melt] id_vars_valid = [col for col in id_vars if col in cols_to_melt] if not value_vars_valid or not id_vars_valid: return pd.DataFrame() # Need both ID and Value vars df_melted = df[cols_to_melt].melt(id_vars=id_vars_valid, value_vars=value_vars_valid, var_name='Domanda', value_name='Punteggio') df_melted['Categoria'] = df_melted['Domanda'].map(cat_map).fillna("Senza Categoria") df_melted.dropna(subset=['Punteggio'], inplace=True) return df_melted numeric_cols_in_filtered = [col for col in numeric_question_cols if col in df_filtered.columns] valid_demographic_cols = [col for col in demographic_cols if col in df_filtered.columns] df_melted_f = pd.DataFrame() # Initialize empty if valid_demographic_cols and numeric_cols_in_filtered: df_melted_f = get_melted_data(df_filtered, valid_demographic_cols, numeric_cols_in_filtered, question_to_category_map) # --- Demographic Comparisons (Violin / Box Plots) --- st.subheader("🎻 Confronti Demografici (Distribuzione Punteggi per Categoria)") if not df_melted_f.empty and valid_demographic_cols: col_comp1, col_comp2 = st.columns(2) with col_comp1: # Select demographic group for comparison comparison_group_v_options = [col for col in valid_demographic_cols if df_filtered[col].nunique() > 1] # Only those with multiple values if comparison_group_v_options: comparison_group_v = st.selectbox("Confronta Distribuzioni per:", comparison_group_v_options, key="dist_group") else: comparison_group_v = None st.info("Nessuna colonna demografica con valori multipli per il confronto.") with col_comp2: dist_plot_type = st.radio("Tipo Grafico Distribuzione:", ["Violin Plot", "Box Plot"], horizontal=True, key="dist_plot_type") if comparison_group_v: # Proceed only if a valid comparison group is selected # Select categories to show (use averages calculated in sidebar) categories_with_averages = avg_scores_per_category_f.index.unique().tolist() if categories_with_averages: default_cats_dist = avg_scores_per_category_f.nsmallest(3).index.tolist() default_cats_dist = [cat for cat in default_cats_dist if cat in categories_with_averages] # Ensure defaults are valid selected_cats_dist = st.multiselect("Seleziona Categorie da Visualizzare:", options=categories_with_averages, default=default_cats_dist, key="cat_dist") if selected_cats_dist: # Filter melted data for selected categories and ensure comparison group is not NA df_dist = df_melted_f[(df_melted_f['Categoria'].isin(selected_cats_dist)) & (df_melted_f[comparison_group_v].notna()) & (df_melted_f[comparison_group_v] != 'Non specificato')] # Exclude 'Non specificato'? Optional. if not df_dist.empty: # Ensure hover data columns exist hover_data = [col for col in valid_demographic_cols if col in df_dist.columns] plot_func = px.violin if dist_plot_type == "Violin Plot" else px.box caption_text = ("Il grafico a violino mostra la densità della distribuzione..." if dist_plot_type == "Violin Plot" else "Il box plot mostra mediana, quartili...") fig_dist = plot_func(df_dist, x='Categoria', y='Punteggio', color=comparison_group_v, points=False, # 'all', False, 'outliers' hover_data=hover_data, category_orders={'Categoria': selected_cats_dist}, # Use selected order template=PLOTLY_TEMPLATE, title=f"Distribuzione Punteggi per {comparison_group_v}") fig_dist.update_layout(yaxis_range=[response_scale[0]-0.5, response_scale[1]+0.5]) st.plotly_chart(fig_dist, use_container_width=True) st.caption(caption_text) else: st.warning(f"Nessun dato per le categorie e gruppo '{comparison_group_v}' selezionati.") else: st.info("Seleziona almeno una categoria per visualizzare il confronto.") else: st.warning("Medie per categoria non disponibili.") else: st.info("Dati o colonne demografiche insufficienti per i confronti.") st.markdown("---") # --- Driver Analysis --- st.subheader("🎯 Analisi Driver (Impatto vs Performance)") if not driver_df.empty: # Use pre-calculated driver_df from sidebar driver_plot_type = st.radio("Visualizza Analisi Driver come:", ["Scatter Plot", "Density Heatmap", "Bar Chart (Top/Bottom)"], horizontal=True, key="driver_plot_type") if driver_plot_type == "Scatter Plot": # (Code for Scatter Plot - seems okay, uses driver_df) fig_scatter_drivers = px.scatter(driver_df, x='Punteggio Medio', y='Correlazione', color='Categoria', size='Correlazione_Abs', size_max=18, hover_data=['Domanda_Breve', 'Punteggio Medio', 'Correlazione'], template=PLOTLY_TEMPLATE, title=f"Driver: Impatto (Corr. {corr_method_sidebar.capitalize()}) vs Performance") avg_corr = driver_df['Correlazione'].mean() avg_score_all_q = driver_df['Punteggio Medio'].mean() fig_scatter_drivers.add_vline(x=avg_score_all_q, line_width=1, line_dash="dash", line_color="grey", annotation_text="Media Perf.") fig_scatter_drivers.add_hline(y=avg_corr, line_width=1, line_dash="dash", line_color="grey", annotation_text="Media Impatto") fig_scatter_drivers.update_layout(xaxis_title="Performance (Punteggio Medio Domanda)", yaxis_title=f"Impatto (Corr. {corr_method_sidebar.capitalize()} con Sod. Gen.)") st.plotly_chart(fig_scatter_drivers, use_container_width=True) st.caption("Quadranti (vs medie): Alto Dx (Verde)=Forza Chiave; Alto Sx (Giallo)=Priorità Alta; Basso Sx (Rosso)=Priorità Bassa; Basso Dx (Blu)=Mantenimento Secondario. Dimensione = forza correlazione.") elif driver_plot_type == "Density Heatmap": # (Code for Density Heatmap - seems okay, uses driver_df) fig_density_driver = px.density_heatmap(driver_df, x="Punteggio Medio", y="Correlazione", marginal_x="histogram", marginal_y="histogram", text_auto=False, template=PLOTLY_TEMPLATE, title=f"Densità Driver: Impatto (Corr. {corr_method_sidebar.capitalize()}) vs Performance") avg_corr = driver_df['Correlazione'].mean() avg_score_all_q = driver_df['Punteggio Medio'].mean() fig_density_driver.add_vline(x=avg_score_all_q, line_width=1, line_dash="dash", line_color="grey") fig_density_driver.add_hline(y=avg_corr, line_width=1, line_dash="dash", line_color="grey") fig_density_driver.update_layout(xaxis_title="Performance (Punteggio Medio Domanda)", yaxis_title=f"Impatto (Corr. {corr_method_sidebar.capitalize()} con Sod. Gen.)") st.plotly_chart(fig_density_driver, use_container_width=True) st.caption("Mostra dove si concentrano le domande nel piano Impatto-Performance.") elif driver_plot_type == "Bar Chart (Top/Bottom)": # (Code for Bar Chart - seems okay, uses driver_df) top_n = st.slider("Numero Top/Bottom Driver da mostrare:", min_value=3, max_value=15, value=8, key="driver_topn") driver_df_unique = driver_df.loc[~driver_df.index.duplicated(keep='first')] top_drivers = driver_df_unique.sort_values('Correlazione', ascending=False).head(top_n) bottom_drivers = driver_df_unique.sort_values('Correlazione', ascending=True).head(top_n) # Gets most negative # Combine and ensure uniqueness (in case a driver is both top N pos and top N neg in small datasets) drivers_to_plot = pd.concat([top_drivers, bottom_drivers]).drop_duplicates().sort_values('Correlazione') if not drivers_to_plot.empty: fig_drivers_bar = px.bar(drivers_to_plot, x='Correlazione', y='Domanda_Breve', orientation='h', color='Categoria', template=PLOTLY_TEMPLATE, height=max(400, len(drivers_to_plot)*30), title=f"Top/Bottom {top_n} Domande per Correlazione ({corr_method_sidebar.capitalize()}) con Sod. Gen.") fig_drivers_bar.update_layout(yaxis={'categoryorder':'total ascending'}, xaxis_title=f"Correlazione {corr_method_sidebar.capitalize()}", yaxis_title="Domanda") st.plotly_chart(fig_drivers_bar, use_container_width=True) st.caption(f"Mostra le domande con la correlazione ({corr_method_sidebar}) più forte (positiva e negativa) con la soddisfazione generale.") else: st.warning("Nessun dato driver da mostrare nel grafico a barre.") else: st.warning("Impossibile calcolare l'analisi dei driver. Verifica la presenza e la varianza della domanda di soddisfazione generale e delle altre domande numeriche.") st.markdown("---") # --- Anomaly Detection & Recommendations --- st.subheader("⚠️ Rilevamento Potenziali Punti d'Attenzione & Suggerimenti 💡") # Use melted data calculated earlier if not df_melted_f.empty and valid_demographic_cols and not avg_scores_per_category_f.empty: col_anom, col_sugg = st.columns(2) with col_anom: st.write("**Possibili Punti d'Attenzione (Z-Score per Gruppo/Categoria):**") try: # Calculate overall category means and std deviations on the *filtered* dataset overall_cat_stats = df_melted_f.groupby('Categoria')['Punteggio'].agg(['mean', 'std']).reset_index() # Rename columns *before* merge overall_cat_stats = overall_cat_stats.rename(columns={'mean': 'mean_overall', 'std': 'std_overall'}) # Calculate group means within the filtered dataset group_means = df_melted_f.groupby(valid_demographic_cols + ['Categoria'], observed=True)['Punteggio'].mean().reset_index() # Rename columns *before* merge group_means = group_means.rename(columns={'Punteggio': 'mean_group'}) if not group_means.empty and not overall_cat_stats.empty: # Merge using the renamed columns merged_stats = pd.merge(group_means, overall_cat_stats, on='Categoria', how='left') # Calculate Z-score only if std is not NaN and greater than a small epsilon merged_stats_valid_std = merged_stats[merged_stats['std_overall'].notna() & (merged_stats['std_overall'] > 0.01)].copy() # Use copy to avoid SettingWithCopyWarning if not merged_stats_valid_std.empty: # *** CORRECTION HERE: Use correct column names *** merged_stats_valid_std['Z_Score'] = (merged_stats_valid_std['mean_group'] - merged_stats_valid_std['mean_overall']) / merged_stats_valid_std['std_overall'] z_score_threshold = st.slider("Soglia Z-Score per Attenzione:", min_value=1.0, max_value=3.0, value=1.75, step=0.25, key="zscore_thresh") potential_anomalies = merged_stats_valid_std[abs(merged_stats_valid_std['Z_Score']) > z_score_threshold].sort_values(by='Z_Score') if not potential_anomalies.empty: st.write(f"Gruppi/Categorie con punteggio medio deviante (> {z_score_threshold:.2f} dev. std. dalla media della categoria):") for _, row in potential_anomalies.head(10).iterrows(): # Limit display group_desc_parts = [f"{col}={row[col]}" for col in valid_demographic_cols] group_desc = " / ".join(group_desc_parts) direction = "⚠️ Basso" if row['Z_Score'] < 0 else "✅ Alto" # Use mean_group and Z_Score from the row st.markdown(f"- {direction}: **{group_desc}** in **'{row['Categoria']}'** (Media Gruppo: {row['mean_group']:.2f}, Z: {row['Z_Score']:.2f})") else: st.info(f"Nessun punto d'attenzione rilevato con soglia Z-Score > {z_score_threshold:.2f} nei dati filtrati.") else: st.info("Deviazione standard non calcolabile o nulla per le categorie, impossibile calcolare Z-score.") else: st.info("Dati insufficienti per calcolare medie di gruppo o statistiche di categoria.") except KeyError as e: st.error(f"Errore Chiave durante il calcolo Z-Score: '{e}'. Verifica i nomi delle colonne dopo il merge.") st.dataframe(merged_stats.head()) # Display merged df head for debugging except Exception as e: st.error(f"Errore generico durante il calcolo Z-Score: {e}") with col_sugg: # Suggestions part remains the same, using driver_df calculated in sidebar st.write("**Suggerimenti Basati sui Driver & Punteggi Bassi:**") if not avg_scores_per_category_f.empty: lowest_cat_name = avg_scores_per_category_f.index[0] lowest_cat_score = avg_scores_per_category_f.iloc[0] st.markdown(f"**Area più debole (media bassa):** '{lowest_cat_name}' ({lowest_cat_score:.2f}).") if not driver_df.empty: avg_corr = driver_df['Correlazione'].mean() avg_score_all_q = driver_df['Punteggio Medio'].mean() low_score_threshold = avg_score_all_q high_impact_threshold = avg_corr critical_drivers = driver_df[ (driver_df['Punteggio Medio'] < low_score_threshold) & (driver_df['Correlazione'] > high_impact_threshold) ].sort_values('Correlazione', ascending=False) if not critical_drivers.empty: st.markdown("**Priorità Alte (Bassa Performance, Alto Impatto):**") for _, row in critical_drivers.head(5).iterrows(): st.markdown(f"- *{row['Domanda_Breve']}* (Cat: {row['Categoria']}, Score: {row['Punteggio Medio']:.2f}, Corr: {row['Correlazione']:.2f})") st.warning("Intervenire su queste domande potrebbe avere il maggior impatto positivo sulla soddisfazione generale.") else: st.info("Nessuna domanda trovata nel quadrante 'Priorità Alte' con le soglie attuali.") # Generic suggestions suggestions = { "Stress e benessere": "Considerare iniziative per la gestione dello stress, flessibilità lavorativa, e supporto psicologico.", # ... (rest of suggestions map) ... "Apertura e inclusione": "Programmi D&I, garantire libertà di espressione e sicurezza psicologica." } default_suggestion = "Approfondire le cause specifiche tramite focus group o interviste mirate." st.markdown("**Possibili Azioni Generiche per l'Area più Debole:**") st.info(suggestions.get(lowest_cat_name, default_suggestion)) else: st.write("Nessun dato medio per categoria disponibile per generare suggerimenti.") else: st.info("Dati insufficienti per rilevare anomalie o fornire suggerimenti.") # ============================================================================== # --- TAB Advanced: More Complex Visualizations --- # ============================================================================== with tab_advanced: st.header("📈 Grafici Avanzati (Filtrati)") if df_filtered is None or df_filtered.empty: st.warning("Nessun dato disponibile con i filtri selezionati.") elif not numeric_question_cols: st.warning("Nessuna domanda numerica trovata per le analisi avanzate.") else: # Use the melted data prepared in the Comparisons tab if available if 'df_melted_f' not in locals() or df_melted_f.empty: # Try to recreate df_melted_f if not available numeric_cols_in_filtered = [col for col in numeric_question_cols if col in df_filtered.columns] valid_demographic_cols = [col for col in demographic_cols if col in df_filtered.columns] if valid_demographic_cols and numeric_cols_in_filtered: df_melted_f = get_melted_data(df_filtered, valid_demographic_cols, numeric_cols_in_filtered, question_to_category_map) else: df_melted_f = pd.DataFrame() if df_melted_f.empty and not numeric_cols_in_filtered: # Check again if still empty or no numerics st.warning("Dati insufficienti per i grafici avanzati.") else: # --- 1. Correlation Heatmap --- st.subheader("🔥 Heatmap di Correlazione tra Domande Numeriche") corr_method_options = ['pearson'] if SCIPY_AVAILABLE: corr_method_options.append('spearman') corr_method_adv = st.radio("Metodo Correlazione:", corr_method_options, horizontal=True, key="corr_method_adv") numeric_cols_in_filtered_adv = [col for col in numeric_question_cols if col in df_filtered.columns and df_filtered[col].nunique(dropna=True) > 1] if len(numeric_cols_in_filtered_adv) > 1: # Etichette univoche e leggibili corr_labels = { q: (f"{str(q)[:27]}..." if len(str(q)) > 30 else str(q)) + f" [{i}]" for i, q in enumerate(numeric_cols_in_filtered_adv) } df_corr = df_filtered[numeric_cols_in_filtered_adv].rename(columns=corr_labels) try: corr_matrix = df_corr.corr(method=corr_method_adv) if not corr_matrix.empty: fig_heatmap = px.imshow( corr_matrix, text_auto=".2f", aspect="auto", color_continuous_scale='RdBu_r', range_color=[-1, 1], template=PLOTLY_TEMPLATE, title=f"Heatmap Correlazione ({corr_method_adv.capitalize()}) tra Domande" ) heatmap_height = max(600, len(numeric_cols_in_filtered_adv) * 20) fig_heatmap.update_layout(height=heatmap_height, xaxis_tickangle=-45) st.plotly_chart(fig_heatmap, use_container_width=True) st.caption("Rosso = correlazione negativa, Blu = correlazione positiva.") else: st.warning("Matrice di correlazione vuota.") except Exception as e: st.warning(f"Errore nel calcolo heatmap: {e}") else: st.info("Servono almeno due domande numeriche con varianza per la heatmap.") st.markdown("---") # --- 2. Radar Chart --- st.subheader("🕸️ Radar Chart: Confronto Medie Categorie per Gruppo Demografico") if not avg_scores_per_category_f.empty and valid_demographic_cols and not df_melted_f.empty: radar_demo_options = [col for col in valid_demographic_cols if df_filtered[col].nunique() > 1] if radar_demo_options: radar_demo_col = st.selectbox("Seleziona Gruppo Demografico per Confronto Radar:", radar_demo_options, key="radar_demo") available_groups = sorted(df_filtered[radar_demo_col].astype(str).unique()) available_groups = [g for g in available_groups if g != 'Non specificato'] # Exclude 'Non specificato'? if len(available_groups) > 1: groups_to_compare = st.multiselect(f"Seleziona '{radar_demo_col}' da confrontare:", options=available_groups, default=available_groups[:min(len(available_groups), 3)], key="radar_groups") if groups_to_compare: radar_data = df_melted_f[df_melted_f[radar_demo_col].isin(groups_to_compare)] avg_radar = radar_data.groupby(['Categoria', radar_demo_col], observed=True)['Punteggio'].mean().unstack() avg_radar = avg_radar.dropna(axis=0, how='all') # Drop categories with no data if not avg_radar.empty: categories_radar = avg_radar.index.tolist() fig_radar = go.Figure() color_sequence = px.colors.qualitative.Plotly # Use a color sequence for i, group in enumerate(groups_to_compare): if group in avg_radar.columns: fig_radar.add_trace(go.Scatterpolar( r=avg_radar[group].values, theta=categories_radar, fill='toself', name=str(group), line_color=color_sequence[i % len(color_sequence)] # Cycle through colors )) min_scale_radar, max_scale_radar = response_scale if response_scale else (1, 6) fig_radar.update_layout( polar=dict(radialaxis=dict(visible=True, range=[min_scale_radar-0.5, max_scale_radar+0.5])), showlegend=True, title=f"Confronto Medie Categorie Radar per {radar_demo_col}", template=PLOTLY_TEMPLATE ) st.plotly_chart(fig_radar, use_container_width=True) else: st.warning(f"Nessun dato medio disponibile per i gruppi selezionati.") else: st.info(f"Seleziona almeno un gruppo.") else: st.info(f"Solo un gruppo disponibile in '{radar_demo_col}'.") else: st.info("Nessuna colonna demografica con valori multipli per il confronto Radar.") else: st.info("Dati insufficienti (medie categorie, demo, melted) per il grafico Radar.") st.markdown("---") # --- 3. Parallel Coordinates Plot --- # (Code for Parallel Coordinates - kept similar, relies on df_melted_f) st.subheader("|| Parrallel Coordinates: Pattern Medie Categorie per Gruppo") st.warning("Attenzione: Questo grafico può essere lento o illeggibile con molti dati/categorie.") if not avg_scores_per_category_f.empty and valid_demographic_cols and not df_melted_f.empty: cats_parallel_options = avg_scores_per_category_f.index.unique().tolist() if cats_parallel_options: default_cats_parallel = cats_parallel_options[:min(len(cats_parallel_options), 8)] cats_parallel = st.multiselect("Seleziona Categorie (Dimensioni):", cats_parallel_options, default=default_cats_parallel, key="par_cats") if cats_parallel: parallel_demo_options = [col for col in valid_demographic_cols if df_filtered[col].nunique() > 1] if parallel_demo_options: parallel_demo_col = st.selectbox("Colora Linee per Gruppo Demografico:", parallel_demo_options, key="par_demo") # Calculate mean scores per selected category and chosen demo group df_parallel_prep = df_melted_f[df_melted_f['Categoria'].isin(cats_parallel)] df_parallel = df_parallel_prep.groupby([parallel_demo_col, 'Categoria'], observed=True)['Punteggio'].mean().unstack() df_parallel = df_parallel.dropna().reset_index() if not df_parallel.empty and parallel_demo_col in df_parallel.columns: # Map group names to numerical values for continuous color scale unique_groups_par = df_parallel[parallel_demo_col].unique() group_map = {name: i for i, name in enumerate(unique_groups_par)} df_parallel['color_val'] = df_parallel[parallel_demo_col].map(group_map) dimensions = [] for cat in cats_parallel: if cat in df_parallel.columns: dimensions.append(dict( range = [response_scale[0], response_scale[1]] if response_scale else [1,6], label = str(cat)[:20] + '...' if len(str(cat))>20 else str(cat), values = df_parallel[cat] )) if dimensions: color_palette_par = px.colors.qualitative.Plotly fig_parallel = go.Figure(data= go.Parcoords( line = dict(color = df_parallel['color_val'], colorscale = color_palette_par, # Use qualitative scale directly showscale = False), dimensions = dimensions )) fig_parallel.update_layout( title=f"Medie Categorie per {parallel_demo_col} (Parallel Coordinates)", template=PLOTLY_TEMPLATE) st.plotly_chart(fig_parallel, use_container_width=True) # Manual legend st.write(f"**Legenda Colori ({parallel_demo_col}):**") cols_legend = st.columns(min(len(group_map), 5)) i = 0 for name, num in group_map.items(): color = color_palette_par[num % len(color_palette_par)] with cols_legend[i % min(len(group_map), 5)]: st.markdown(f" {name}", unsafe_allow_html=True) i += 1 else: st.warning("Nessuna dimensione valida per Parallel Coordinates.") else: st.warning(f"Nessun dato medio aggregato per {parallel_demo_col}.") else: st.info("Nessuna colonna demografica con valori multipli per colorare le linee.") else: st.info("Seleziona almeno una categoria (dimensione).") else: st.info("Nessuna categoria disponibile per Parallel Coordinates.") else: st.info("Dati insufficienti (medie categorie, demo, melted) per Parallel Coordinates.") st.markdown("---") # --- 4. Stacked Area Chart --- # (Code for Stacked Area Chart - kept similar, relies on df_melted_f) st.subheader("📊 Stacked Area Chart: Distribuzione Risposte per Categoria su Gruppo Ordinato") if not df_melted_f.empty and valid_demographic_cols: ordered_demo_options = [col for col in valid_demographic_cols if 'Eta' in col or 'Anzianita' in col] if not ordered_demo_options: ordered_demo_options = valid_demographic_cols # Fallback if ordered_demo_options: area_demo_col = st.selectbox("Seleziona Gruppo Demografico Ordinato:", ordered_demo_options, key="area_demo") area_cat_options = avg_scores_per_category_f.index.unique().tolist() if area_cat_options: area_category = st.selectbox("Seleziona Categoria:", area_cat_options, key="area_cat") df_area_prep = df_melted_f[(df_melted_f['Categoria'] == area_category) & df_melted_f[area_demo_col].notna()].copy() if not df_area_prep.empty: df_area_prep['Sentiment'] = df_area_prep['Punteggio'].apply(categorize_score) df_area = df_area_prep.groupby([area_demo_col, 'Sentiment'], observed=True).size().reset_index(name='Conteggio') df_area['Percentuale'] = df_area.groupby(area_demo_col)['Conteggio'].transform(lambda x: x / float(x.sum()) * 100 if x.sum() > 0 else 0) category_orders = {} group_order = None if 'Eta' in area_demo_col: age_order_guess = ['Fino a 30 anni', '31-40 anni', '41-50 anni', 'Oltre i 50 anni', 'Non specificato'] actual_groups = df_area[area_demo_col].unique() group_order = [g for g in age_order_guess if g in actual_groups] group_order.extend(sorted([g for g in actual_groups if g not in age_order_guess])) category_orders={area_demo_col: group_order} # Ensure Sentiment order for stacking sentiment_order = ["Critico", "Neutrale", "Positivo", "Non Risposto"] category_orders['Sentiment'] = [s for s in sentiment_order if s in df_area['Sentiment'].unique()] plot_colors = BUCKET_COLORS.copy() plot_colors["Non Risposto"] = "#bbbbbb" if not df_area.empty: fig_area = px.area(df_area, x=area_demo_col, y='Percentuale', color='Sentiment', title=f"Distribuzione Sentiment (%) per '{area_category}' per {area_demo_col}", labels={'Percentuale': '% Rispondenti'}, category_orders=category_orders, color_discrete_map=plot_colors, template=PLOTLY_TEMPLATE) fig_area.update_layout(yaxis_range=[0, 100], yaxis_ticksuffix="%") st.plotly_chart(fig_area, use_container_width=True) else: st.warning("Nessun dato aggregato per l'Area Chart.") else: st.warning(f"Nessun dato trovato per la categoria '{area_category}'.") else: st.info("Nessuna categoria valida trovata.") else: st.info("Nessuna colonna demografica disponibile per l'Area Chart.") else: st.info("Dati insufficienti (melted, demo) per l'Area Chart.") # --- Download Button --- st.sidebar.divider() st.sidebar.subheader("📥 Download Dati Filtrati") if df_filtered is not None and not df_filtered.empty: output = BytesIO() try: df_to_download = df_filtered.copy() df_to_download.to_csv(output, index=False, encoding='utf-8', sep=';') output.seek(0) st.sidebar.download_button(label="Scarica Dati Filtrati Correnti (CSV)", data=output, file_name='dati_sondaggio_filtrati_avanzato.csv', mime='text/csv', key='download_csv') except Exception as e: st.sidebar.error(f"Errore durante la creazione del CSV: {e}") else: st.sidebar.info("Nessun dato filtrato da scaricare.") # --- Footer --- st.markdown("---") # Use a dynamic timestamp try: current_time_str = pd.Timestamp.now(tz='Europe/Rome').strftime('%Y-%m-%d %H:%M:%S %Z') except Exception: # Fallback if timezone fails current_time_str = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') st.caption(f"Dashboard Analisi Clima") # Altrimenti (se uploaded_file is None), non mostra nulla tranne l'uploader else: st.title("🚀 Dashboard Analisi Clima") st.info("Per iniziare, carica un file CSV usando il widget qui sopra.")