Spaces:
Sleeping
Sleeping
Update ui/callbacks.py
Browse files- ui/callbacks.py +47 -70
ui/callbacks.py
CHANGED
@@ -2,12 +2,12 @@
|
|
2 |
|
3 |
# -*- coding: utf-8 -*-
|
4 |
#
|
5 |
-
# PROJECT: CognitiveEDA v5.
|
6 |
#
|
7 |
-
# DESCRIPTION:
|
8 |
-
#
|
9 |
-
#
|
10 |
-
#
|
11 |
|
12 |
import gradio as gr
|
13 |
import pandas as pd
|
@@ -22,76 +22,66 @@ from core.llm import GeminiNarrativeGenerator
|
|
22 |
from core.config import settings
|
23 |
from core.exceptions import DataProcessingError
|
24 |
from modules.clustering import perform_clustering
|
25 |
-
from modules.text import generate_word_cloud
|
26 |
-
from modules.timeseries import analyze_time_series
|
27 |
|
28 |
|
29 |
# --- Primary Analysis Chain ---
|
30 |
|
31 |
def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
|
32 |
"""
|
33 |
-
Phase 1
|
34 |
-
Validates inputs, loads data, and creates the core DataAnalyzer object.
|
|
|
35 |
|
36 |
Args:
|
37 |
file_obj: The uploaded file object from Gradio.
|
38 |
progress: The Gradio progress tracker.
|
39 |
|
40 |
Returns:
|
41 |
-
The instantiated DataAnalyzer object,
|
42 |
-
Returns None if any validation or processing fails.
|
43 |
"""
|
44 |
-
# 1. Input Validation
|
45 |
if file_obj is None:
|
46 |
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
|
47 |
|
48 |
-
# 2. Runtime Configuration Validation
|
49 |
progress(0, desc="Validating configuration...")
|
50 |
if not settings.GOOGLE_API_KEY:
|
51 |
logging.error("Analysis attempted without GOOGLE_API_KEY set.")
|
52 |
-
raise gr.Error(
|
53 |
-
"CRITICAL: GOOGLE_API_KEY is not configured. "
|
54 |
-
"Please add it to your .env file or as a platform secret and restart."
|
55 |
-
)
|
56 |
|
57 |
try:
|
58 |
-
# 3. Data Loading
|
59 |
progress(0.2, desc="Loading and parsing data file...")
|
60 |
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
|
61 |
if len(df) > settings.MAX_UI_ROWS:
|
62 |
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
|
63 |
logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
|
64 |
|
65 |
-
# 4. Core Analyzer Instantiation
|
66 |
progress(0.7, desc="Instantiating analysis engine...")
|
67 |
analyzer = DataAnalyzer(df)
|
68 |
-
progress(1.0, desc="Initial analysis complete.")
|
69 |
return analyzer
|
70 |
-
|
71 |
-
except DataProcessingError as e:
|
72 |
-
logging.error(f"User-facing data processing error: {e}", exc_info=True)
|
73 |
-
raise gr.Error(str(e))
|
74 |
except Exception as e:
|
75 |
-
logging.error(f"A critical
|
76 |
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
|
77 |
|
78 |
|
79 |
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
|
80 |
"""
|
81 |
-
Phase 2
|
82 |
-
This generator function yields UI updates
|
|
|
83 |
|
84 |
Args:
|
85 |
analyzer: The DataAnalyzer object from the gr.State.
|
86 |
progress: The Gradio progress tracker.
|
87 |
|
88 |
Yields:
|
89 |
-
A
|
90 |
"""
|
91 |
-
# Guard clause: Do nothing if the initial analysis failed.
|
92 |
if not isinstance(analyzer, DataAnalyzer):
|
93 |
-
logging.warning("generate_reports_and_visuals called without a valid analyzer.
|
94 |
-
|
|
|
|
|
|
|
95 |
|
96 |
# 1. Start AI narrative generation in a background thread
|
97 |
progress(0, desc="Spawning AI report thread...")
|
@@ -103,40 +93,41 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
|
|
103 |
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
|
104 |
thread.start()
|
105 |
|
106 |
-
# 2. Generate standard reports and visuals
|
107 |
progress(0.4, desc="Generating data profiles and visuals...")
|
108 |
meta = analyzer.metadata
|
109 |
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
|
110 |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
|
111 |
|
112 |
-
# 3. Yield the first set of updates to populate the main dashboard immediately
|
113 |
-
|
114 |
-
initial_updates =
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
yield initial_updates
|
131 |
|
132 |
# 4. Wait for the AI thread to complete
|
133 |
thread.join()
|
134 |
progress(1.0, desc="AI Report complete!")
|
135 |
|
136 |
-
# 5. Yield the final update
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
140 |
|
141 |
|
142 |
# --- Interactive Explorer Callbacks ---
|
@@ -166,24 +157,10 @@ def create_scatterplot(analyzer, x_col, y_col, color_col):
|
|
166 |
# --- Specialized Module Callbacks ---
|
167 |
|
168 |
def update_clustering(analyzer, k):
|
169 |
-
"""Callback for the clustering module."""
|
170 |
if not isinstance(analyzer, DataAnalyzer):
|
171 |
return gr.update(), gr.update(), gr.update(value="Run analysis first.")
|
172 |
|
173 |
# Delegate the heavy lifting to the specialized module
|
174 |
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
|
175 |
-
return fig_cluster, fig_elbow, summary
|
176 |
-
|
177 |
-
# Add other specialized callbacks for text and time-series here if needed.
|
178 |
-
# For example, if you add the dropdowns and plots to the layout:
|
179 |
-
#
|
180 |
-
# def update_timeseries(analyzer, date_col, value_col):
|
181 |
-
# if not isinstance(analyzer, DataAnalyzer):
|
182 |
-
# return gr.update(), gr.update(value="Run analysis first.")
|
183 |
-
# fig, md = analyze_time_series(analyzer.df, date_col, value_col)
|
184 |
-
# return fig, md
|
185 |
-
#
|
186 |
-
# def update_text(analyzer, text_col):
|
187 |
-
# if not isinstance(analyzer, DataAnalyzer):
|
188 |
-
# return gr.update()
|
189 |
-
# return generate_word_cloud(analyzer.df, text_col)
|
|
|
2 |
|
3 |
# -*- coding: utf-8 -*-
|
4 |
#
|
5 |
+
# PROJECT: CognitiveEDA v5.5 - The QuantumLeap Intelligence Platform
|
6 |
#
|
7 |
+
# DESCRIPTION: This module contains the core logic for all Gradio event handlers.
|
8 |
+
# It is designed to be completely decoupled from the UI definition.
|
9 |
+
# Functions here return values in a specific order (often as tuples)
|
10 |
+
# that correspond to a list of output components defined in app.py.
|
11 |
|
12 |
import gradio as gr
|
13 |
import pandas as pd
|
|
|
22 |
from core.config import settings
|
23 |
from core.exceptions import DataProcessingError
|
24 |
from modules.clustering import perform_clustering
|
|
|
|
|
25 |
|
26 |
|
27 |
# --- Primary Analysis Chain ---
|
28 |
|
29 |
def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
|
30 |
"""
|
31 |
+
Phase 1: Fast, synchronous tasks.
|
32 |
+
Validates inputs, loads data, and creates the core DataAnalyzer object. This
|
33 |
+
function updates the gr.State object, which then triggers the next phase.
|
34 |
|
35 |
Args:
|
36 |
file_obj: The uploaded file object from Gradio.
|
37 |
progress: The Gradio progress tracker.
|
38 |
|
39 |
Returns:
|
40 |
+
The instantiated DataAnalyzer object, or None if processing fails.
|
|
|
41 |
"""
|
|
|
42 |
if file_obj is None:
|
43 |
raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
|
44 |
|
|
|
45 |
progress(0, desc="Validating configuration...")
|
46 |
if not settings.GOOGLE_API_KEY:
|
47 |
logging.error("Analysis attempted without GOOGLE_API_KEY set.")
|
48 |
+
raise gr.Error("CRITICAL: GOOGLE_API_KEY is not configured. Please add it as a secret.")
|
|
|
|
|
|
|
49 |
|
50 |
try:
|
|
|
51 |
progress(0.2, desc="Loading and parsing data file...")
|
52 |
df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
|
53 |
if len(df) > settings.MAX_UI_ROWS:
|
54 |
df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
|
55 |
logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
|
56 |
|
|
|
57 |
progress(0.7, desc="Instantiating analysis engine...")
|
58 |
analyzer = DataAnalyzer(df)
|
59 |
+
progress(1.0, desc="Initial analysis complete. Generating reports...")
|
60 |
return analyzer
|
|
|
|
|
|
|
|
|
61 |
except Exception as e:
|
62 |
+
logging.error(f"A critical error occurred during initial analysis: {e}", exc_info=True)
|
63 |
raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
|
64 |
|
65 |
|
66 |
def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
|
67 |
"""
|
68 |
+
Phase 2: Slower, multi-stage report and visual generation.
|
69 |
+
This generator function yields tuples of UI updates. The order of the yielded
|
70 |
+
tuple is CRITICAL and must exactly match the `main_outputs` list in `app.py`.
|
71 |
|
72 |
Args:
|
73 |
analyzer: The DataAnalyzer object from the gr.State.
|
74 |
progress: The Gradio progress tracker.
|
75 |
|
76 |
Yields:
|
77 |
+
A tuple of gr.update() objects to populate the dashboard.
|
78 |
"""
|
|
|
79 |
if not isinstance(analyzer, DataAnalyzer):
|
80 |
+
logging.warning("generate_reports_and_visuals called without a valid analyzer. Clearing UI.")
|
81 |
+
# Return a tuple of Nones matching the output length to clear/reset the UI.
|
82 |
+
# There are 14 components in the `main_outputs` list in app.py.
|
83 |
+
yield (None,) * 14
|
84 |
+
return
|
85 |
|
86 |
# 1. Start AI narrative generation in a background thread
|
87 |
progress(0, desc="Spawning AI report thread...")
|
|
|
93 |
thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
|
94 |
thread.start()
|
95 |
|
96 |
+
# 2. Generate standard reports and visuals
|
97 |
progress(0.4, desc="Generating data profiles and visuals...")
|
98 |
meta = analyzer.metadata
|
99 |
missing_df, num_df, cat_df = analyzer.get_profiling_reports()
|
100 |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
|
101 |
|
102 |
+
# 3. Yield the first set of updates to populate the main dashboard immediately.
|
103 |
+
# The order of this tuple MUST match the `main_outputs` list in `app.py`.
|
104 |
+
initial_updates = (
|
105 |
+
gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."), # 0: ai_report_output
|
106 |
+
gr.update(value=missing_df), # 1: profile_missing_df
|
107 |
+
gr.update(value=num_df), # 2: profile_numeric_df
|
108 |
+
gr.update(value=cat_df), # 3: profile_categorical_df
|
109 |
+
gr.update(value=fig_types), # 4: plot_types
|
110 |
+
gr.update(value=fig_missing), # 5: plot_missing
|
111 |
+
gr.update(value=fig_corr), # 6: plot_correlation
|
112 |
+
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 7: dd_hist_col
|
113 |
+
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 8: dd_scatter_x
|
114 |
+
gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None), # 9: dd_scatter_y
|
115 |
+
gr.update(choices=meta['columns']), # 10: dd_scatter_color
|
116 |
+
gr.update(visible=bool(meta['datetime_cols'])), # 11: tab_timeseries
|
117 |
+
gr.update(visible=bool(meta['text_cols'])), # 12: tab_text
|
118 |
+
gr.update(visible=len(meta['numeric_cols']) > 1) # 13: tab_cluster
|
119 |
+
)
|
120 |
yield initial_updates
|
121 |
|
122 |
# 4. Wait for the AI thread to complete
|
123 |
thread.join()
|
124 |
progress(1.0, desc="AI Report complete!")
|
125 |
|
126 |
+
# 5. Yield the final update. We create a mutable list from the initial tuple,
|
127 |
+
# update the AI report element, and convert it back to a tuple to yield.
|
128 |
+
final_updates_list = list(initial_updates)
|
129 |
+
final_updates_list[0] = gr.update(value=ai_report_queue[0])
|
130 |
+
yield tuple(final_updates_list)
|
131 |
|
132 |
|
133 |
# --- Interactive Explorer Callbacks ---
|
|
|
157 |
# --- Specialized Module Callbacks ---
|
158 |
|
159 |
def update_clustering(analyzer, k):
|
160 |
+
"""Callback for the clustering module. Returns a tuple of three updates."""
|
161 |
if not isinstance(analyzer, DataAnalyzer):
|
162 |
return gr.update(), gr.update(), gr.update(value="Run analysis first.")
|
163 |
|
164 |
# Delegate the heavy lifting to the specialized module
|
165 |
fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
|
166 |
+
return fig_cluster, fig_elbow, summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|