Spaces:
Sleeping
Sleeping
File size: 5,807 Bytes
d1943e0 12fa967 1dae368 12fa967 a00699a 1dae368 12fa967 d1943e0 7d40c30 fe02df7 d1943e0 12fa967 d1943e0 1dae368 7d40c30 1dae368 7d40c30 1dae368 fcc261b 1dae368 fcc261b fe02df7 1dae368 fe02df7 7d40c30 fcc261b 1dae368 fcc261b 7d40c30 a00699a 7d40c30 fe02df7 1dae368 fcc261b fe02df7 fcc261b a00699a 1dae368 fe02df7 1dae368 a00699a fcc261b a00699a fcc261b 1dae368 7d40c30 1dae368 fe02df7 7d40c30 1dae368 fe02df7 7d40c30 1dae368 fcc261b 1dae368 7d40c30 1dae368 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# ui/callbacks.py
# -*- coding: utf-8 -*-
#
# PROJECT: CognitiveEDA v5.7 - The QuantumLeap Intelligence Platform
#
# DESCRIPTION: This module contains the core logic for all Gradio event handlers.
# The clustering callback is now updated to include persona profiling.
import gradio as gr
import pandas as pd
import logging
from threading import Thread
import plotly.graph_objects as go
import plotly.express as px
from core.analyzer import DataAnalyzer, engineer_features
from core.llm import GeminiNarrativeGenerator
from core.config import settings
from core.exceptions import DataProcessingError
from modules.clustering import perform_clustering
# --- NEW IMPORT ---
from modules.profiling import profile_clusters
# --- Primary Analysis Chain (Unchanged) ---
def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
if file_obj is None: raise gr.Error("No file uploaded.")
progress(0, desc="Validating configuration...")
if not settings.GOOGLE_API_KEY: raise gr.Error("CRITICAL: GOOGLE_API_KEY is not configured.")
try:
progress(0.1, desc="Loading raw data...")
df_raw = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
if len(df_raw) > settings.MAX_UI_ROWS:
df_raw = df_raw.sample(n=settings.MAX_UI_ROWS, random_state=42)
progress(0.5, desc="Applying strategic feature engineering...")
df_engineered = engineer_features(df_raw)
progress(0.8, desc="Instantiating analysis engine...")
analyzer = DataAnalyzer(df_engineered)
progress(1.0, desc="Analysis complete. Generating reports...")
return analyzer
except Exception as e:
logging.error(f"Error in initial analysis: {e}", exc_info=True)
raise gr.Error(f"Analysis Failed: {str(e)}")
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
if not isinstance(analyzer, DataAnalyzer):
yield (None,) * 14
return
progress(0, desc="Spawning AI report thread...")
ai_report_queue = [""]
def generate_ai_report_threaded(a):
narrative_generator = GeminiNarrativeGenerator(settings.GOOGLE_API_KEY)
ai_report_queue[0] = narrative_generator.generate_narrative(a)
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
thread.start()
progress(0.4, desc="Generating reports and visuals...")
meta = analyzer.metadata
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
initial_updates = (
gr.update(value="⏳ Generating AI report..."), gr.update(value=missing_df),
gr.update(value=num_df), gr.update(value=cat_df), gr.update(value=fig_types),
gr.update(value=fig_missing), gr.update(value=fig_corr),
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None),
gr.update(choices=meta['columns']), gr.update(visible=bool(meta['datetime_cols'])),
gr.update(visible=bool(meta['text_cols'])), gr.update(visible=len(meta['numeric_cols']) > 1)
)
yield initial_updates
thread.join()
progress(1.0, desc="AI Report complete!")
final_updates_list = list(initial_updates)
final_updates_list[0] = gr.update(value=ai_report_queue[0])
yield tuple(final_updates_list)
# --- Interactive Explorer Callbacks (Unchanged) ---
def create_histogram(analyzer, col):
if not isinstance(analyzer, DataAnalyzer) or not col: return go.Figure()
return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box")
def create_scatterplot(analyzer, x_col, y_col, color_col):
if not isinstance(analyzer, DataAnalyzer) or not x_col or not y_col: return go.Figure()
df_sample = analyzer.df.sample(n=min(len(analyzer.df), 10000))
return px.scatter(df_sample, x=x_col, y=y_col, color=color_col if color_col else None)
# --- MODIFIED CLUSTERING CALLBACK ---
def update_clustering(analyzer, k):
"""
Orchestrates the full clustering workflow:
1. Runs K-Means clustering.
2. Receives cluster labels.
3. Calls the profiling module to analyze the segments.
4. Returns all results to the UI.
"""
if not isinstance(analyzer, DataAnalyzer):
# Return empty updates for all 5 clustering output components
return go.Figure(), go.Figure(), "", "", go.Figure()
# Step 1: Perform Clustering to get visuals and labels
fig_cluster, fig_elbow, summary, cluster_labels = perform_clustering(
analyzer.df, analyzer.metadata['numeric_cols'], k
)
if cluster_labels.empty:
# Handle cases where clustering fails (e.g., not enough data)
return fig_cluster, fig_elbow, summary, "Clustering failed. No personas to profile.", go.Figure()
# Step 2: Profile the resulting clusters
numeric_to_profile = ['Total_Revenue', 'Quantity_Ordered', 'Hour']
cats_to_profile = ['City', 'Product', 'Day_of_Week']
# Filter to only use columns that actually exist in the engineered dataframe
numeric_to_profile = [c for c in numeric_to_profile if c in analyzer.df.columns]
cats_to_profile = [c for c in cats_to_profile if c in analyzer.df.columns]
md_personas, fig_profile = profile_clusters(
analyzer.df, cluster_labels, numeric_to_profile, cats_to_profile
)
# Step 3: Return all 5 results in the correct order for the UI
return fig_cluster, fig_elbow, summary, md_personas, fig_profile |