Spaces:

mgbam
/

CognitiveEDA

Sleeping

App Files Files Community

CognitiveEDA / ui /callbacks.py

mgbam

Update ui/callbacks.py

7d40c30 verified 11 days ago

raw

history blame

8.16 kB

	# ui/callbacks.py

	# -- coding: utf-8 --
	#
	# PROJECT: CognitiveEDA v5.2 - The QuantumLeap Intelligence Platform
	#
	# DESCRIPTION: The "Controller" logic of the application. This module contains
	# the Python functions that execute when Gradio events are triggered.
	# It is designed to be completely decoupled from the UI definition
	# and event attachment process.

	import gradio as gr
	import pandas as pd
	import logging
	from threading import Thread

	import plotly.graph_objects as go
	import plotly.express as px

	from core.analyzer import DataAnalyzer
	from core.llm import GeminiNarrativeGenerator
	from core.config import settings
	from core.exceptions import DataProcessingError
	from modules.clustering import perform_clustering
	from modules.text import generate_word_cloud
	from modules.timeseries import analyze_time_series


	# --- Primary Analysis Chain ---

	def run_initial_analysis(file_obj, progress=gr.Progress(track_tqdm=True)):
	"""
	Phase 1 of the analysis: Fast, synchronous tasks.
	Validates inputs, loads data, and creates the core DataAnalyzer object.

	Args:
	file_obj: The uploaded file object from Gradio.
	progress: The Gradio progress tracker.

	Returns:
	The instantiated DataAnalyzer object, which will update the gr.State.
	Returns None if any validation or processing fails.
	"""
	# 1. Input Validation
	if file_obj is None:
	raise gr.Error("No file uploaded. Please upload a CSV or Excel file.")

	# 2. Runtime Configuration Validation
	progress(0, desc="Validating configuration...")
	if not settings.GOOGLE_API_KEY:
	logging.error("Analysis attempted without GOOGLE_API_KEY set.")
	raise gr.Error(
	"CRITICAL: GOOGLE_API_KEY is not configured. "
	"Please add it to your .env file or as a platform secret and restart."
	)

	try:
	# 3. Data Loading
	progress(0.2, desc="Loading and parsing data file...")
	df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
	if len(df) > settings.MAX_UI_ROWS:
	df = df.sample(n=settings.MAX_UI_ROWS, random_state=42)
	logging.info(f"DataFrame sampled down to {settings.MAX_UI_ROWS} rows.")

	# 4. Core Analyzer Instantiation
	progress(0.7, desc="Instantiating analysis engine...")
	analyzer = DataAnalyzer(df)
	progress(1.0, desc="Initial analysis complete.")
	return analyzer

	except DataProcessingError as e:
	logging.error(f"User-facing data processing error: {e}", exc_info=True)
	raise gr.Error(str(e))
	except Exception as e:
	logging.error(f"A critical unhandled error occurred during initial analysis: {e}", exc_info=True)
	raise gr.Error(f"Analysis Failed! An unexpected error occurred: {str(e)}")


	def generate_reports_and_visuals(analyzer, progress=gr.Progress(track_tqdm=True)):
	"""
	Phase 2 of the analysis: Slower, multi-stage tasks.
	This generator function yields UI updates as they become available.

	Args:
	analyzer: The DataAnalyzer object from the gr.State.
	progress: The Gradio progress tracker.

	Yields:
	A dictionary of Gradio updates to populate the dashboard.
	"""
	# Guard clause: Do nothing if the initial analysis failed.
	if not isinstance(analyzer, DataAnalyzer):
	logging.warning("generate_reports_and_visuals called without a valid analyzer. Aborting.")
	return {}

	# 1. Start AI narrative generation in a background thread
	progress(0, desc="Spawning AI report thread...")
	ai_report_queue = [""] # Use a mutable list to pass string by reference
	def generate_ai_report_threaded(analyzer_instance):
	narrative_generator = GeminiNarrativeGenerator(api_key=settings.GOOGLE_API_KEY)
	ai_report_queue[0] = narrative_generator.generate_narrative(analyzer_instance)

	thread = Thread(target=generate_ai_report_threaded, args=(analyzer,))
	thread.start()

	# 2. Generate standard reports and visuals (this is fast)
	progress(0.4, desc="Generating data profiles and visuals...")
	meta = analyzer.metadata
	missing_df, num_df, cat_df = analyzer.get_profiling_reports()
	fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()

	# 3. Yield the first set of updates to populate the main dashboard immediately
	progress(0.8, desc="Building initial dashboard...")
	initial_updates = {
	"ai_report_output": gr.update(value="⏳ Generating AI-powered report in the background... The main dashboard is ready now."),
	"profile_missing_df": gr.update(value=missing_df),
	"profile_numeric_df": gr.update(value=num_df),
	"profile_categorical_df": gr.update(value=cat_df),
	"plot_types": gr.update(value=fig_types),
	"plot_missing": gr.update(value=fig_missing),
	"plot_correlation": gr.update(value=fig_corr),
	"dd_hist_col": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
	"dd_scatter_x": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None),
	"dd_scatter_y": gr.update(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None),
	"dd_scatter_color": gr.update(choices=meta['columns']),
	"tab_timeseries": gr.update(visible=bool(meta['datetime_cols'])),
	"tab_text": gr.update(visible=bool(meta['text_cols'])),
	"tab_cluster": gr.update(visible=len(meta['numeric_cols']) > 1),
	}
	yield initial_updates

	# 4. Wait for the AI thread to complete
	thread.join()
	progress(1.0, desc="AI Report complete!")

	# 5. Yield the final update, now including the AI-generated report
	final_updates = initial_updates.copy()
	final_updates["ai_report_output"] = ai_report_queue[0]
	yield final_updates


	# --- Interactive Explorer Callbacks ---

	def create_histogram(analyzer, col):
	"""Generates a histogram for a selected numeric column."""
	if not isinstance(analyzer, DataAnalyzer) or not col:
	return go.Figure().update_layout(title="Select a column to generate a histogram")
	return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")

	def create_scatterplot(analyzer, x_col, y_col, color_col):
	"""Generates a scatter plot for selected X, Y, and optional color columns."""
	if not isinstance(analyzer, DataAnalyzer) or not x_col or not y_col:
	return go.Figure().update_layout(title="Select X and Y axes to generate a scatter plot")

	# Use a subset for performance on large datasets
	df_sample = analyzer.df
	if len(analyzer.df) > 10000:
	df_sample = analyzer.df.sample(n=10000, random_state=42)

	return px.scatter(
	df_sample, x=x_col, y=y_col, color=color_col if color_col else None,
	title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white"
	)


	# --- Specialized Module Callbacks ---

	def update_clustering(analyzer, k):
	"""Callback for the clustering module."""
	if not isinstance(analyzer, DataAnalyzer):
	return gr.update(), gr.update(), gr.update(value="Run analysis first.")

	# Delegate the heavy lifting to the specialized module
	fig_cluster, fig_elbow, summary = perform_clustering(analyzer.df, analyzer.metadata['numeric_cols'], k)
	return fig_cluster, fig_elbow, summary

	# Add other specialized callbacks for text and time-series here if needed.
	# For example, if you add the dropdowns and plots to the layout:
	#
	# def update_timeseries(analyzer, date_col, value_col):
	# if not isinstance(analyzer, DataAnalyzer):
	# return gr.update(), gr.update(value="Run analysis first.")
	# fig, md = analyze_time_series(analyzer.df, date_col, value_col)
	# return fig, md
	#
	# def update_text(analyzer, text_col):
	# if not isinstance(analyzer, DataAnalyzer):
	# return gr.update()
	# return generate_word_cloud(analyzer.df, text_col)