Spaces:
Sleeping
Sleeping
File size: 7,631 Bytes
d1943e0 12fa967 a00699a 12fa967 a00699a 12fa967 d1943e0 7d40c30 d1943e0 12fa967 d1943e0 7d40c30 d1943e0 7d40c30 12fa967 a00699a 7d40c30 a00699a 12fa967 fcc261b a00699a fcc261b 7d40c30 fcc261b 7d40c30 fcc261b 7d40c30 fcc261b a00699a 7d40c30 fcc261b a00699a fcc261b a00699a 7d40c30 a00699a fcc261b 7d40c30 a00699a 7d40c30 fcc261b a00699a 7d40c30 fcc261b a00699a fcc261b 7d40c30 fcc261b 7d40c30 a00699a fcc261b 7d40c30 fcc261b a00699a 7d40c30 fcc261b a00699a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# ui/callbacks.py
# -*- coding: utf-8 -*-
#
# PROJECT: CognitiveEDA v5.5 - The QuantumLeap Intelligence Platform
#
# DESCRIPTION: This module contains the core logic for all Gradio event handlers.
# It is designed to be completely decoupled from the UI definition.
# Functions here return values in a specific order (often as tuples)
# that correspond to a list of output components defined in app.py.
import gradio as gr
import pandas as pd
import logging
from threading import Thread
import plotly.graph_objects as go
import plotly.express as px
from core.analyzer import DataAnalyzer
from core.llm import GeminiNarrativeGenerator
from core.config import settings
from core.exceptions import DataProcessingError
from modules.clustering import perform_clustering
# --- Primary Analysis Chain ---
def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
"""
Phase 1: Fast, synchronous tasks.
Validates inputs, loads data, and creates the core DataAnalyzer object. This
function updates the gr.State object, which then triggers the next phase.
Args:
file_obj: The uploaded file object from Gradio.
progress: The Gradio progress tracker.
Returns:
The instantiated DataAnalyzer object, or None if processing fails.
"""
if file_obj is None:
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
progress(0, desc="Validating configuration...")
if not settings.GOOGLE_API_KEY:
logging.error("Analysis attempted without GOOGLE_API_KEY set.")
raise gr.Error("CRITICAL: GOOGLE_API_KEY is not configured. Please add it as a secret.")
try:
progress(0.2, desc="Loading and parsing data file...")
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
if len(df) > settings.MAX_UI_ROWS:
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
progress(0.7, desc="Instantiating analysis engine...")
analyzer = DataAnalyzer(df)
progress(1.0, desc="Initial analysis complete. Generating reports...")
return analyzer
except Exception as e:
logging.error(f"A critical error occurred during initial analysis: {e}", exc_info=True)
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
"""
Phase 2: Slower, multi-stage report and visual generation.
This generator function yields tuples of UI updates. The order of the yielded
tuple is CRITICAL and must exactly match the `main_outputs` list in `app.py`.
Args:
analyzer: The DataAnalyzer object from the gr.State.
progress: The Gradio progress tracker.
Yields:
A tuple of gr.update() objects to populate the dashboard.
"""
if not isinstance(analyzer, DataAnalyzer):
logging.warning("generate_reports_and_visuals called without a valid analyzer. Clearing UI.")
# Return a tuple of Nones matching the output length to clear/reset the UI.
# There are 14 components in the `main_outputs` list in app.py.
yield (None,) * 14
return
# 1. Start AI narrative generation in a background thread
progress(0, desc="Spawning AI report thread...")
ai_report_queue = [""] # Use a mutable list to pass string by reference
def generate_ai_report_threaded(analyzer_instance):
narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
thread.start()
# 2. Generate standard reports and visuals
progress(0.4, desc="Generating data profiles and visuals...")
meta = analyzer.metadata
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
# 3. Yield the first set of updates to populate the main dashboard immediately.
# The order of this tuple MUST match the `main_outputs` list in `app.py`.
initial_updates = (
gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."), # 0: ai_report_output
gr.update(value=missing_df), # 1: profile_missing_df
gr.update(value=num_df), # 2: profile_numeric_df
gr.update(value=cat_df), # 3: profile_categorical_df
gr.update(value=fig_types), # 4: plot_types
gr.update(value=fig_missing), # 5: plot_missing
gr.update(value=fig_corr), # 6: plot_correlation
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 7: dd_hist_col
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 8: dd_scatter_x
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None), # 9: dd_scatter_y
gr.update(choices=meta['columns']), # 10: dd_scatter_color
gr.update(visible=bool(meta['datetime_cols'])), # 11: tab_timeseries
gr.update(visible=bool(meta['text_cols'])), # 12: tab_text
gr.update(visible=len(meta['numeric_cols']) > 1) # 13: tab_cluster
)
yield initial_updates
# 4. Wait for the AI thread to complete
thread.join()
progress(1.0, desc="AI Report complete!")
# 5. Yield the final update. We create a mutable list from the initial tuple,
# update the AI report element, and convert it back to a tuple to yield.
final_updates_list = list(initial_updates)
final_updates_list[0] = gr.update(value=ai_report_queue[0])
yield tuple(final_updates_list)
# --- Interactive Explorer Callbacks ---
def create_histogram(analyzer, col):
"""Generates a histogram for a selected numeric column."""
if not isinstance(analyzer, DataAnalyzer) or not col:
return go.Figure().update_layout(title="Select a column to generate a histogram")
return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
def create_scatterplot(analyzer, x_col, y_col, color_col):
"""Generates a scatter plot for selected X, Y, and optional color columns."""
if not isinstance(analyzer, DataAnalyzer) or not x_col or not y_col:
return go.Figure().update_layout(title="Select X and Y axes to generate a scatter plot")
# Use a subset for performance on large datasets
df_sample = analyzer.df
if len(analyzer.df) > 10000:
df_sample = analyzer.df.sample(n=10000, random_state=42)
return px.scatter(
df_sample, x=x_col, y=y_col, color=color_col if color_col else None,
title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white"
)
# --- Specialized Module Callbacks ---
def update_clustering(analyzer, k):
"""Callback for the clustering module. Returns a tuple of three updates."""
if not isinstance(analyzer, DataAnalyzer):
return gr.update(), gr.update(), gr.update(value="Run analysis first.")
# Delegate the heavy lifting to the specialized module
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
return fig_cluster, fig_elbow, summary |