Spaces:
Sleeping
Sleeping
# ui/callbacks.py | |
# -*- coding: utf-8 -*- | |
# | |
# PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform | |
# | |
# DESCRIPTION: This module now contains only the CORE LOGIC for the Gradio | |
# event handlers. It exports these functions to be attached to | |
# listeners within the main application context. | |
import gradio as gr | |
import pandas as pd | |
import logging | |
from threading import Thread | |
from core.analyzer import DataAnalyzer | |
from core.llm import GeminiNarrativeGenerator | |
from core.config import settings | |
from core.exceptions import DataProcessingError | |
from modules.clustering import perform_clustering | |
# ... other module imports | |
# --- Main Analysis Logic --- | |
def run_full_analysis(file_obj, progress=gr.Progress(track_tqdm=True)): | |
""" | |
The primary orchestration function. This is the logic that will be called | |
by the 'analyze_button.click' event. | |
""" | |
# 1. Input Validation (File) | |
if file_obj is None: | |
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.") | |
# 2. Runtime Configuration Validation (API Key) | |
progress(0, desc="Validating configuration...") | |
if not settings.GOOGLE_API_KEY: | |
logging.error("Analysis attempted without GOOGLE_API_KEY set.") | |
raise gr.Error( | |
"CRITICAL: GOOGLE_API_KEY is not configured. " | |
"Please add it to your .env file or as a platform secret and restart." | |
) | |
try: | |
# 3. Data Loading & Core Analysis | |
progress(0.1, desc="Loading and parsing data...") | |
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name) | |
if len(df) > settings.MAX_UI_ROWS: | |
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42) | |
progress(0.3, desc="Instantiating analysis engine...") | |
analyzer = DataAnalyzer(df) | |
return analyzer # We will return the analyzer and handle the rest in a subsequent step | |
except DataProcessingError as e: | |
logging.error(f"User-facing data processing error: {e}", exc_info=True) | |
raise gr.Error(str(e)) | |
except Exception as e: | |
logging.error(f"A critical unhandled error occurred: {e}", exc_info=True) | |
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}") | |
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)): | |
""" | |
A generator function that yields UI updates. Triggered after the analyzer is created. | |
""" | |
if not analyzer: | |
# This prevents errors if the initial analysis failed. | |
# Create an empty dictionary that matches the structure of `updates` | |
# so Gradio has something to unpack. | |
return { "state_analyzer": None } | |
# 1. Start AI thread | |
progress(0.1, desc="Spawning AI report thread...") | |
ai_report_queue = [""] | |
def generate_ai_report_threaded(analyzer_instance): | |
narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY) | |
ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance) | |
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,)) | |
thread.start() | |
# 2. Generate standard reports | |
progress(0.4, desc="Generating data profiles...") | |
meta = analyzer.metadata | |
missing_df, num_df, cat_df = analyzer.get_profiling_reports() | |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals() | |
# 3. Yield initial updates | |
progress(0.8, desc="Building initial dashboard...") | |
initial_updates = { | |
"ai_report_output": gr.update(value="⏳ Generating AI report... Main dashboard is ready."), | |
"profile_missing_df": gr.update(value=missing_df), | |
"profile_numeric_df": gr.update(value=num_df), | |
"profile_categorical_df": gr.update(value=cat_df), | |
"plot_types": gr.update(value=fig_types), | |
"plot_missing": gr.update(value=fig_missing), | |
"plot_correlation": gr.update(value=fig_corr), | |
"dd_hist_col": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), | |
"dd_scatter_x": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), | |
"dd_scatter_y": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None), | |
"dd_scatter_color": gr.update(choices=meta['columns']), | |
"tab_timeseries": gr.update(visible=bool(meta['datetime_cols'])), | |
"tab_text": gr.update(visible=bool(meta['text_cols'])), | |
"tab_cluster": gr.update(visible=len(meta['numeric_cols']) > 1), | |
} | |
yield initial_updates | |
# 4. Wait for thread and yield final AI report | |
thread.join() | |
progress(1.0, desc="AI Report complete!") | |
final_updates = initial_updates.copy() | |
final_updates["ai_report_output"] = ai_report_queue[0] | |
yield final_updates | |
# --- Other Interactive Callback Logic --- | |
def update_clustering(analyzer, k): | |
if not analyzer: return gr.update(), gr.update(), gr.update() | |
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k) | |
return fig_cluster, fig_elbow, summary |