Spaces:

mgbam
/

CognitiveEDA

Sleeping

App Files Files Community

mgbam commited on 9 days ago

Commit

a00699a

verified ·

1 Parent(s): 16f99f2

Update ui/callbacks.py

Browse files

Files changed (1) hide show

ui/callbacks.py +47 -70

ui/callbacks.py CHANGED Viewed

@@ -2,12 +2,12 @@
 # -*- coding: utf-8 -*-
 #
-# PROJECT:      CognitiveEDA v5.2 - The QuantumLeap Intelligence Platform
 #
-# DESCRIPTION:  The "Controller" logic of the application. This module contains
-#               the Python functions that execute when Gradio events are triggered.
-#               It is designed to be completely decoupled from the UI definition
-#               and event attachment process.
 import gradio as gr
 import pandas as pd
@@ -22,76 +22,66 @@ from core.llm import GeminiNarrativeGenerator
 from core.config import settings
 from core.exceptions import DataProcessingError
 from modules.clustering import perform_clustering
-from modules.text import generate_word_cloud
-from modules.timeseries import analyze_time_series
 # --- Primary Analysis Chain ---
 def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
     """
-    Phase 1 of the analysis: Fast, synchronous tasks.
-    Validates inputs, loads data, and creates the core DataAnalyzer object.
     Args:
         file_obj: The uploaded file object from Gradio.
         progress: The Gradio progress tracker.
     Returns:
-        The instantiated DataAnalyzer object, which will update the gr.State.
-        Returns None if any validation or processing fails.
     """
-    # 1. Input Validation
     if file_obj is None:
         raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
-    # 2. Runtime Configuration Validation
     progress(0, desc="Validating configuration...")
     if not settings.GOOGLE_API_KEY:
         logging.error("Analysis attempted without GOOGLE_API_KEY set.")
-        raise gr.Error(
-            "CRITICAL: GOOGLE_API_KEY is not configured. "
-            "Please add it to your .env file or as a platform secret and restart."
-        )
     try:
-        # 3. Data Loading
         progress(0.2, desc="Loading and parsing data file...")
         df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
         if len(df) > settings.MAX_UI_ROWS:
             df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
             logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
-        # 4. Core Analyzer Instantiation
         progress(0.7, desc="Instantiating analysis engine...")
         analyzer = DataAnalyzer(df)
-        progress(1.0, desc="Initial analysis complete.")
         return analyzer
-    except DataProcessingError as e:
-        logging.error(f"User-facing data processing error: {e}", exc_info=True)
-        raise gr.Error(str(e))
     except Exception as e:
-        logging.error(f"A critical unhandled error occurred during initial analysis: {e}", exc_info=True)
         raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
 def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
     """
-    Phase 2 of the analysis: Slower, multi-stage tasks.
-    This generator function yields UI updates as they become available.
     Args:
         analyzer: The DataAnalyzer object from the gr.State.
         progress: The Gradio progress tracker.
     Yields:
-        A dictionary of Gradio updates to populate the dashboard.
     """
-    # Guard clause: Do nothing if the initial analysis failed.
     if not isinstance(analyzer, DataAnalyzer):
-        logging.warning("generate_reports_and_visuals called without a valid analyzer. Aborting.")
-        return {}
     # 1. Start AI narrative generation in a background thread
     progress(0, desc="Spawning AI report thread...")
@@ -103,40 +93,41 @@ def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)
     thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
     thread.start()
-    # 2. Generate standard reports and visuals (this is fast)
     progress(0.4, desc="Generating data profiles and visuals...")
     meta = analyzer.metadata
     missing_df, num_df, cat_df = analyzer.get_profiling_reports()
     fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
-    # 3. Yield the first set of updates to populate the main dashboard immediately
-    progress(0.8, desc="Building initial dashboard...")
-    initial_updates = {
-        "ai_report_output": gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."),
-        "profile_missing_df": gr.update(value=missing_df),
-        "profile_numeric_df": gr.update(value=num_df),
-        "profile_categorical_df": gr.update(value=cat_df),
-        "plot_types": gr.update(value=fig_types),
-        "plot_missing": gr.update(value=fig_missing),
-        "plot_correlation": gr.update(value=fig_corr),
-        "dd_hist_col": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
-        "dd_scatter_x": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
-        "dd_scatter_y": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None),
-        "dd_scatter_color": gr.update(choices=meta['columns']),
-        "tab_timeseries": gr.update(visible=bool(meta['datetime_cols'])),
-        "tab_text": gr.update(visible=bool(meta['text_cols'])),
-        "tab_cluster": gr.update(visible=len(meta['numeric_cols']) > 1),
-    }
     yield initial_updates
     # 4. Wait for the AI thread to complete
     thread.join()
     progress(1.0, desc="AI Report complete!")
-    # 5. Yield the final update, now including the AI-generated report
-    final_updates = initial_updates.copy()
-    final_updates["ai_report_output"] = ai_report_queue[0]
-    yield final_updates
 # --- Interactive Explorer Callbacks ---
@@ -166,24 +157,10 @@ def create_scatterplot(analyzer, x_col, y_col, color_col):
 # --- Specialized Module Callbacks ---
 def update_clustering(analyzer, k):
-    """Callback for the clustering module."""
     if not isinstance(analyzer, DataAnalyzer):
         return gr.update(), gr.update(), gr.update(value="Run analysis first.")
     # Delegate the heavy lifting to the specialized module
     fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
-    return fig_cluster, fig_elbow, summary
-# Add other specialized callbacks for text and time-series here if needed.
-# For example, if you add the dropdowns and plots to the layout:
-#
-# def update_timeseries(analyzer, date_col, value_col):
-#     if not isinstance(analyzer, DataAnalyzer):
-#         return gr.update(), gr.update(value="Run analysis first.")
-#     fig, md = analyze_time_series(analyzer.df, date_col, value_col)
-#     return fig, md
-#
-# def update_text(analyzer, text_col):
-#     if not isinstance(analyzer, DataAnalyzer):
-#         return gr.update()
-#     return generate_word_cloud(analyzer.df, text_col)

 # -*- coding: utf-8 -*-
 #
+# PROJECT:      CognitiveEDA v5.5 - The QuantumLeap Intelligence Platform
 #
+# DESCRIPTION:  This module contains the core logic for all Gradio event handlers.
+#               It is designed to be completely decoupled from the UI definition.
+#               Functions here return values in a specific order (often as tuples)
+#               that correspond to a list of output components defined in app.py.
 import gradio as gr
 import pandas as pd
 from core.config import settings
 from core.exceptions import DataProcessingError
 from modules.clustering import perform_clustering
 # --- Primary Analysis Chain ---
 def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
     """
+    Phase 1: Fast, synchronous tasks.
+    Validates inputs, loads data, and creates the core DataAnalyzer object. This
+    function updates the gr.State object, which then triggers the next phase.
     Args:
         file_obj: The uploaded file object from Gradio.
         progress: The Gradio progress tracker.
     Returns:
+        The instantiated DataAnalyzer object, or None if processing fails.
     """
     if file_obj is None:
         raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")
     progress(0, desc="Validating configuration...")
     if not settings.GOOGLE_API_KEY:
         logging.error("Analysis attempted without GOOGLE_API_KEY set.")
+        raise gr.Error("CRITICAL: GOOGLE_API_KEY is not configured. Please add it as a secret.")
     try:
         progress(0.2, desc="Loading and parsing data file...")
         df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
         if len(df) > settings.MAX_UI_ROWS:
             df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
             logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")
         progress(0.7, desc="Instantiating analysis engine...")
         analyzer = DataAnalyzer(df)
+        progress(1.0, desc="Initial analysis complete. Generating reports...")
         return analyzer
     except Exception as e:
+        logging.error(f"A critical error occurred during initial analysis: {e}", exc_info=True)
         raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")
 def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
     """
+    Phase 2: Slower, multi-stage report and visual generation.
+    This generator function yields tuples of UI updates. The order of the yielded
+    tuple is CRITICAL and must exactly match the `main_outputs` list in `app.py`.
     Args:
         analyzer: The DataAnalyzer object from the gr.State.
         progress: The Gradio progress tracker.
     Yields:
+        A tuple of gr.update() objects to populate the dashboard.
     """
     if not isinstance(analyzer, DataAnalyzer):
+        logging.warning("generate_reports_and_visuals called without a valid analyzer. Clearing UI.")
+        # Return a tuple of Nones matching the output length to clear/reset the UI.
+        # There are 14 components in the `main_outputs` list in app.py.
+        yield (None,) * 14
+        return
     # 1. Start AI narrative generation in a background thread
     progress(0, desc="Spawning AI report thread...")
     thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
     thread.start()
+    # 2. Generate standard reports and visuals
     progress(0.4, desc="Generating data profiles and visuals...")
     meta = analyzer.metadata
     missing_df, num_df, cat_df = analyzer.get_profiling_reports()
     fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
+    # 3. Yield the first set of updates to populate the main dashboard immediately.
+    # The order of this tuple MUST match the `main_outputs` list in `app.py`.
+    initial_updates = (
+        gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."), # 0: ai_report_output
+        gr.update(value=missing_df),        # 1: profile_missing_df
+        gr.update(value=num_df),            # 2: profile_numeric_df
+        gr.update(value=cat_df),            # 3: profile_categorical_df
+        gr.update(value=fig_types),         # 4: plot_types
+        gr.update(value=fig_missing),       # 5: plot_missing
+        gr.update(value=fig_corr),          # 6: plot_correlation
+        gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 7: dd_hist_col
+        gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None), # 8: dd_scatter_x
+        gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None), # 9: dd_scatter_y
+        gr.update(choices=meta['columns']), # 10: dd_scatter_color
+        gr.update(visible=bool(meta['datetime_cols'])), # 11: tab_timeseries
+        gr.update(visible=bool(meta['text_cols'])),     # 12: tab_text
+        gr.update(visible=len(meta['numeric_cols']) > 1) # 13: tab_cluster
+    )
     yield initial_updates
     # 4. Wait for the AI thread to complete
     thread.join()
     progress(1.0, desc="AI Report complete!")
+    # 5. Yield the final update. We create a mutable list from the initial tuple,
+    #    update the AI report element, and convert it back to a tuple to yield.
+    final_updates_list = list(initial_updates)
+    final_updates_list[0] = gr.update(value=ai_report_queue[0])
+    yield tuple(final_updates_list)
 # --- Interactive Explorer Callbacks ---
 # --- Specialized Module Callbacks ---
 def update_clustering(analyzer, k):
+    """Callback for the clustering module. Returns a tuple of three updates."""
     if not isinstance(analyzer, DataAnalyzer):
         return gr.update(), gr.update(), gr.update(value="Run analysis first.")
     # Delegate the heavy lifting to the specialized module
     fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
+    return fig_cluster, fig_elbow, summary