mgbam commited on
Commit
a00699a
·
verified ·
1 Parent(s): 16f99f2

Update ui/callbacks.py

Browse files
Files changed (1) hide show
  1. ui/callbacks.py +47 -70
ui/callbacks.py CHANGED
@@ -2,12 +2,12 @@
2
 
3
  # -*- coding: utf-8 -*-
4
  #
5
- # PROJECT: CognitiveEDA v5.2 - The QuantumLeap Intelligence Platform
6
  #
7
- # DESCRIPTION: The "Controller" logic of the application. This module contains
8
- # the Python functions that execute when Gradio events are triggered.
9
- # It is designed to be completely decoupled from the UI definition
10
- # and event attachment process.
11
 
12
  import gradio as gr
13
  import pandas as pd
@@ -22,76 +22,66 @@ from core.llm import GeminiNarrativeGenerator
22
  from core.config import settings
23
  from core.exceptions import DataProcessingError
24
  from modules.clustering import perform_clustering
25
- from modules.text import generate_word_cloud
26
- from modules.timeseries import analyze_time_series
27
 
28
 
29
  # --- Primary Analysis Chain ---
30
 
31
  def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
32
  """
33
- Phase 1 of the analysis: Fast, synchronous tasks.
34
- Validates inputs, loads data, and creates the core DataAnalyzer object.
 
35
 
36
  Args:
37
  file_obj: The uploaded file object from Gradio.
38
  progress: The Gradio progress tracker.
39
 
40
  Returns:
41
- The instantiated DataAnalyzer object, which will update the gr.State.
42
- Returns None if any validation or processing fails.
43
  """
44
- # 1. Input Validation
45
  if file_obj is None:
46
  raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
47
 
48
- # 2. Runtime Configuration Validation
49
  progress(0, desc="Validating configuration...")
50
  if not settings.GOOGLE_API_KEY:
51
  logging.error("Analysis attempted without GOOGLE_API_KEY set.")
52
- raise gr.Error(
53
- "CRITICAL: GOOGLE_API_KEY is not configured. "
54
- "Please add it to your .env file or as a platform secret and restart."
55
- )
56
 
57
  try:
58
- # 3. Data Loading
59
  progress(0.2, desc="Loading and parsing data file...")
60
  df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
61
  if len(df) > settings.MAX_UI_ROWS:
62
  df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
63
  logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
64
 
65
- # 4. Core Analyzer Instantiation
66
  progress(0.7, desc="Instantiating analysis engine...")
67
  analyzer = DataAnalyzer(df)
68
- progress(1.0, desc="Initial analysis complete.")
69
  return analyzer
70
-
71
- except DataProcessingError as e:
72
- logging.error(f"User-facing data processing error: {e}", exc_info=True)
73
- raise gr.Error(str(e))
74
  except Exception as e:
75
- logging.error(f"A critical unhandled error occurred during initial analysis: {e}", exc_info=True)
76
  raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
77
 
78
 
79
  def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
80
  """
81
- Phase 2 of the analysis: Slower, multi-stage tasks.
82
- This generator function yields UI updates as they become available.
 
83
 
84
  Args:
85
  analyzer: The DataAnalyzer object from the gr.State.
86
  progress: The Gradio progress tracker.
87
 
88
  Yields:
89
- A dictionary of Gradio updates to populate the dashboard.
90
  """
91
- # Guard clause: Do nothing if the initial analysis failed.
92
  if not isinstance(analyzer, DataAnalyzer):
93
- logging.warning("generate_reports_and_visuals called without a valid analyzer. Aborting.")
94
- return {}
 
 
 
95
 
96
  # 1. Start AI narrative generation in a background thread
97
  progress(0, desc="Spawning AI report thread...")
@@ -103,40 +93,41 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
103
  thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
104
  thread.start()
105
 
106
- # 2. Generate standard reports and visuals (this is fast)
107
  progress(0.4, desc="Generating data profiles and visuals...")
108
  meta = analyzer.metadata
109
  missing_df, num_df, cat_df = analyzer.get_profiling_reports()
110
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
111
 
112
- # 3. Yield the first set of updates to populate the main dashboard immediately
113
- progress(0.8, desc="Building initial dashboard...")
114
- initial_updates = {
115
- "ai_report_output": gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."),
116
- "profile_missing_df": gr.update(value=missing_df),
117
- "profile_numeric_df": gr.update(value=num_df),
118
- "profile_categorical_df": gr.update(value=cat_df),
119
- "plot_types": gr.update(value=fig_types),
120
- "plot_missing": gr.update(value=fig_missing),
121
- "plot_correlation": gr.update(value=fig_corr),
122
- "dd_hist_col": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
123
- "dd_scatter_x": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
124
- "dd_scatter_y": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None),
125
- "dd_scatter_color": gr.update(choices=meta['columns']),
126
- "tab_timeseries": gr.update(visible=bool(meta['datetime_cols'])),
127
- "tab_text": gr.update(visible=bool(meta['text_cols'])),
128
- "tab_cluster": gr.update(visible=len(meta['numeric_cols']) > 1),
129
- }
130
  yield initial_updates
131
 
132
  # 4. Wait for the AI thread to complete
133
  thread.join()
134
  progress(1.0, desc="AI Report complete!")
135
 
136
- # 5. Yield the final update, now including the AI-generated report
137
- final_updates = initial_updates.copy()
138
- final_updates["ai_report_output"] = ai_report_queue[0]
139
- yield final_updates
 
140
 
141
 
142
  # --- Interactive Explorer Callbacks ---
@@ -166,24 +157,10 @@ def create_scatterplot(analyzer, x_col, y_col, color_col):
166
  # --- Specialized Module Callbacks ---
167
 
168
  def update_clustering(analyzer, k):
169
- """Callback for the clustering module."""
170
  if not isinstance(analyzer, DataAnalyzer):
171
  return gr.update(), gr.update(), gr.update(value="Run analysis first.")
172
 
173
  # Delegate the heavy lifting to the specialized module
174
  fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
175
- return fig_cluster, fig_elbow, summary
176
-
177
- # Add other specialized callbacks for text and time-series here if needed.
178
- # For example, if you add the dropdowns and plots to the layout:
179
- #
180
- # def update_timeseries(analyzer, date_col, value_col):
181
- # if not isinstance(analyzer, DataAnalyzer):
182
- # return gr.update(), gr.update(value="Run analysis first.")
183
- # fig, md = analyze_time_series(analyzer.df, date_col, value_col)
184
- # return fig, md
185
- #
186
- # def update_text(analyzer, text_col):
187
- # if not isinstance(analyzer, DataAnalyzer):
188
- # return gr.update()
189
- # return generate_word_cloud(analyzer.df, text_col)
 
2
 
3
  # -*- coding: utf-8 -*-
4
  #
5
+ # PROJECT: CognitiveEDA v5.5 - The QuantumLeap Intelligence Platform
6
  #
7
+ # DESCRIPTION: This module contains the core logic for all Gradio event handlers.
8
+ # It is designed to be completely decoupled from the UI definition.
9
+ # Functions here return values in a specific order (often as tuples)
10
+ # that correspond to a list of output components defined in app.py.
11
 
12
  import gradio as gr
13
  import pandas as pd
 
22
  from core.config import settings
23
  from core.exceptions import DataProcessingError
24
  from modules.clustering import perform_clustering
 
 
25
 
26
 
27
  # --- Primary Analysis Chain ---
28
 
29
  def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
30
  """
31
+ Phase 1: Fast, synchronous tasks.
32
+ Validates inputs, loads data, and creates the core DataAnalyzer object. This
33
+ function updates the gr.State object, which then triggers the next phase.
34
 
35
  Args:
36
  file_obj: The uploaded file object from Gradio.
37
  progress: The Gradio progress tracker.
38
 
39
  Returns:
40
+ The instantiated DataAnalyzer object, or None if processing fails.
 
41
  """
 
42
  if file_obj is None:
43
  raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
44
 
 
45
  progress(0, desc="Validating configuration...")
46
  if not settings.GOOGLE_API_KEY:
47
  logging.error("Analysis attempted without GOOGLE_API_KEY set.")
48
+ raise gr.Error("CRITICAL: GOOGLE_API_KEY is not configured. Please add it as a secret.")
 
 
 
49
 
50
  try:
 
51
  progress(0.2, desc="Loading and parsing data file...")
52
  df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
53
  if len(df) > settings.MAX_UI_ROWS:
54
  df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
55
  logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
56
 
 
57
  progress(0.7, desc="Instantiating analysis engine...")
58
  analyzer = DataAnalyzer(df)
59
+ progress(1.0, desc="Initial analysis complete. Generating reports...")
60
  return analyzer
 
 
 
 
61
  except Exception as e:
62
+ logging.error(f"A critical error occurred during initial analysis: {e}", exc_info=True)
63
  raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
64
 
65
 
66
  def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
67
  """
68
+ Phase 2: Slower, multi-stage report and visual generation.
69
+ This generator function yields tuples of UI updates. The order of the yielded
70
+ tuple is CRITICAL and must exactly match the `main_outputs` list in `app.py`.
71
 
72
  Args:
73
  analyzer: The DataAnalyzer object from the gr.State.
74
  progress: The Gradio progress tracker.
75
 
76
  Yields:
77
+ A tuple of gr.update() objects to populate the dashboard.
78
  """
 
79
  if not isinstance(analyzer, DataAnalyzer):
80
+ logging.warning("generate_reports_and_visuals called without a valid analyzer. Clearing UI.")
81
+ # Return a tuple of Nones matching the output length to clear/reset the UI.
82
+ # There are 14 components in the `main_outputs` list in app.py.
83
+ yield (None,) * 14
84
+ return
85
 
86
  # 1. Start AI narrative generation in a background thread
87
  progress(0, desc="Spawning AI report thread...")
 
93
  thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
94
  thread.start()
95
 
96
+ # 2. Generate standard reports and visuals
97
  progress(0.4, desc="Generating data profiles and visuals...")
98
  meta = analyzer.metadata
99
  missing_df, num_df, cat_df = analyzer.get_profiling_reports()
100
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
101
 
102
+ # 3. Yield the first set of updates to populate the main dashboard immediately.
103
+ # The order of this tuple MUST match the `main_outputs` list in `app.py`.
104
+ initial_updates = (
105
+ gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."), # 0: ai_report_output
106
+ gr.update(value=missing_df), # 1: profile_missing_df
107
+ gr.update(value=num_df), # 2: profile_numeric_df
108
+ gr.update(value=cat_df), # 3: profile_categorical_df
109
+ gr.update(value=fig_types), # 4: plot_types
110
+ gr.update(value=fig_missing), # 5: plot_missing
111
+ gr.update(value=fig_corr), # 6: plot_correlation
112
+ gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 7: dd_hist_col
113
+ gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 8: dd_scatter_x
114
+ gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None), # 9: dd_scatter_y
115
+ gr.update(choices=meta['columns']), # 10: dd_scatter_color
116
+ gr.update(visible=bool(meta['datetime_cols'])), # 11: tab_timeseries
117
+ gr.update(visible=bool(meta['text_cols'])), # 12: tab_text
118
+ gr.update(visible=len(meta['numeric_cols']) > 1) # 13: tab_cluster
119
+ )
120
  yield initial_updates
121
 
122
  # 4. Wait for the AI thread to complete
123
  thread.join()
124
  progress(1.0, desc="AI Report complete!")
125
 
126
+ # 5. Yield the final update. We create a mutable list from the initial tuple,
127
+ # update the AI report element, and convert it back to a tuple to yield.
128
+ final_updates_list = list(initial_updates)
129
+ final_updates_list[0] = gr.update(value=ai_report_queue[0])
130
+ yield tuple(final_updates_list)
131
 
132
 
133
  # --- Interactive Explorer Callbacks ---
 
157
  # --- Specialized Module Callbacks ---
158
 
159
  def update_clustering(analyzer, k):
160
+ """Callback for the clustering module. Returns a tuple of three updates."""
161
  if not isinstance(analyzer, DataAnalyzer):
162
  return gr.update(), gr.update(), gr.update(value="Run analysis first.")
163
 
164
  # Delegate the heavy lifting to the specialized module
165
  fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
166
+ return fig_cluster, fig_elbow, summary