mgbam commited on
Commit
f9d0aef
Β·
verified Β·
1 Parent(s): 1f1db6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -81
app.py CHANGED
@@ -10,18 +10,16 @@
10
  # SETUP: $ pip install -r requirements.txt
11
  #
12
  # AUTHOR: An MCP Expert in Data & AI Solutions
13
- # VERSION: 4.1 (Integrated Adaptive Engine)
14
- # LAST-UPDATE: 2023-10-29 (Corrected v4.0 by re-integrating all standard EDA tabs)
15
 
16
  from __future__ import annotations
17
 
18
  import warnings
19
  import logging
20
  import os
21
- import sys
22
- import importlib.util
23
  from datetime import datetime
24
- from typing import Any, Dict, List, Optional, Tuple
25
 
26
  import gradio as gr
27
  import numpy as np
@@ -30,7 +28,7 @@ import plotly.express as px
30
  import plotly.graph_objects as go
31
  import google.generativeai as genai
32
 
33
- # --- Local Adaptive Modules (Requires analysis_modules.py and requirements.txt from previous response) ---
34
  from analysis_modules import analyze_time_series, generate_word_cloud, perform_clustering
35
 
36
  # --- Configuration & Setup ---
@@ -42,11 +40,9 @@ class Config:
42
  GEMINI_MODEL = 'gemini-1.5-flash-latest'
43
  MAX_UI_ROWS = 50000
44
 
45
- # --- Core Analysis Engine (Unchanged from previous response) ---
46
  class DataAnalyzer:
47
- # (The DataAnalyzer class is identical to the previous version and is omitted here for brevity)
48
- # It should contain: __init__, metadata property, _extract_metadata,
49
- # get_profiling_tables, get_overview_visuals, generate_ai_narrative
50
  def __init__(self, df: pd.DataFrame):
51
  if not isinstance(df, pd.DataFrame): raise TypeError("Input must be a pandas DataFrame.")
52
  self.df = df
@@ -63,7 +59,7 @@ class DataAnalyzer:
63
  numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
64
  categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
65
  datetime_cols = self.df.select_dtypes(include=['datetime64', 'datetimetz']).columns.tolist()
66
- text_cols = [col for col in categorical_cols if self.df[col].str.len().mean() > 50]
67
 
68
  high_corr_pairs = []
69
  if len(numeric_cols) > 1:
@@ -104,23 +100,72 @@ class DataAnalyzer:
104
  return fig_types, fig_missing, fig_corr
105
 
106
  def generate_ai_narrative(self, api_key: str, context: Dict[str, Any]) -> str:
107
- # Placeholder for brevity
108
- return "AI Narrative generation is ready."
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  # --- UI Creation ---
112
  def create_ui():
113
  """Defines the complete, integrated Gradio user interface."""
114
-
115
- # --- Reusable plotting functions for interactive tabs ---
116
  def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
117
  if not col or not analyzer: return go.Figure()
118
  return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
119
-
120
  def create_scatterplot(analyzer: DataAnalyzer, x_col: str, y_col:str, color_col:str) -> go.Figure:
121
  if not all([analyzer, x_col, y_col]): return go.Figure()
122
  return px.scatter(analyzer.df, x=x_col, y=y_col, color=color_col, title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white")
123
-
124
  def analyze_single_column(analyzer: DataAnalyzer, col: str) -> Tuple[str, go.Figure]:
125
  if not col or not analyzer: return "", go.Figure()
126
  series = analyzer.df[col]
@@ -136,17 +181,14 @@ def create_ui():
136
 
137
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), title=Config.APP_TITLE) as demo:
138
  state_analyzer = gr.State()
139
-
140
  gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
141
  gr.Markdown("Upload your data to receive a complete standard analysis, plus specialized dashboards that unlock automatically based on your data's content.")
142
-
143
  with gr.Row():
144
  upload_button = gr.File(label="1. Upload Data File (CSV, Excel)", file_types=[".csv", ".xlsx", ".xls"], scale=3)
145
  api_key_input = gr.Textbox(label="2. Enter Google Gemini API Key", type="password", scale=2)
146
  analyze_button = gr.Button("✨ Build My Dashboard", variant="primary", scale=1)
147
 
148
  with gr.Tabs():
149
- # --- Standard Tabs (Always Visible) ---
150
  with gr.Tab("πŸ€– AI Narrative"):
151
  ai_report_output = gr.Markdown("### Your AI-generated report will appear here...")
152
  with gr.Tab("πŸ“‹ Profile"):
@@ -156,38 +198,29 @@ def create_ui():
156
  plot_correlation = gr.Plot()
157
  with gr.Tab("🎨 Interactive Explorer"):
158
  with gr.Row():
159
- with gr.Column(scale=1):
160
- dd_hist_col = gr.Dropdown(label="Select Column for Histogram", interactive=True)
161
- with gr.Column(scale=2):
162
- plot_histogram = gr.Plot()
163
  with gr.Row():
164
  with gr.Column(scale=1):
165
  dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", interactive=True)
166
  dd_scatter_y = gr.Dropdown(label="Y-Axis (Numeric)", interactive=True)
167
  dd_scatter_color = gr.Dropdown(label="Color By (Optional)", interactive=True)
168
- with gr.Column(scale=2):
169
- plot_scatter = gr.Plot()
170
  with gr.Tab("πŸ” Column Deep-Dive"):
171
  dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", interactive=True)
172
- with gr.Row():
173
- md_drilldown_stats, plot_drilldown = gr.Markdown(), gr.Plot()
174
-
175
- # --- Specialized, Adaptive Tabs ---
176
  with gr.Tab("βŒ› Time-Series Analysis", visible=False) as tab_timeseries:
177
  with gr.Row():
178
  dd_ts_date = gr.Dropdown(label="Select Date/Time Column", interactive=True)
179
  dd_ts_value = gr.Dropdown(label="Select Value Column", interactive=True)
180
  plot_ts_decomp, md_ts_stats = gr.Plot(), gr.Markdown()
181
-
182
  with gr.Tab("πŸ“ Text Analysis", visible=False) as tab_text:
183
  dd_text_col = gr.Dropdown(label="Select Text Column", interactive=True)
184
  html_word_cloud = gr.HTML()
185
-
186
  with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster:
187
  num_clusters = gr.Slider(minimum=2, maximum=10, value=4, step=1, label="Number of Clusters (K)", interactive=True)
188
  plot_cluster, md_cluster_summary = gr.Plot(), gr.Markdown()
189
 
190
- # --- Event Listeners ---
191
  main_outputs = [
192
  state_analyzer, ai_report_output,
193
  profile_missing_df, profile_numeric_df, profile_categorical_df,
@@ -195,81 +228,53 @@ def create_ui():
195
  dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col,
196
  tab_timeseries, dd_ts_date, dd_ts_value,
197
  tab_text, dd_text_col,
198
- tab_cluster, num_clusters
199
- ]
200
  analyze_button.click(fn=run_full_analysis, inputs=[upload_button, api_key_input], outputs=main_outputs, show_progress="full")
201
-
202
- # Listeners for standard interactive tabs
203
  dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
204
  scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
205
- for dd in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
206
- dd.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
207
  dd_drilldown_col.change(fn=analyze_single_column, inputs=[state_analyzer, dd_drilldown_col], outputs=[md_drilldown_stats, plot_drilldown])
208
-
209
- # Listeners for specialized adaptive tabs
210
  ts_inputs = [state_analyzer, dd_ts_date, dd_ts_value]
211
- for dd in [dd_ts_date, dd_ts_value]:
212
- dd.change(fn=lambda a, d, v: analyze_time_series(a.df, d, v), inputs=ts_inputs, outputs=[plot_ts_decomp, md_ts_stats])
213
  dd_text_col.change(fn=lambda a, t: generate_word_cloud(a.df, t), inputs=[state_analyzer, dd_text_col], outputs=html_word_cloud)
214
  num_clusters.change(fn=lambda a, k: perform_clustering(a.df, a.metadata['numeric_cols'], k), inputs=[state_analyzer, num_clusters], outputs=[plot_cluster, md_cluster_summary])
215
-
216
  return demo
217
 
218
  # --- Main Application Logic & Orchestration ---
219
  def run_full_analysis(file_obj: gr.File, api_key: str) -> list:
220
- """Orchestrates the complete standard and adaptive analysis."""
221
  if file_obj is None: raise gr.Error("CRITICAL: No file uploaded.")
222
  if not api_key: raise gr.Error("CRITICAL: Gemini API key is missing.")
223
-
224
  try:
225
  logging.info(f"Processing uploaded file: {file_obj.name}")
226
  df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
227
-
228
- if len(df) > Config.MAX_UI_ROWS:
229
- df = df.sample(n=Config.MAX_UI_ROWS, random_state=42)
230
-
231
  analyzer = DataAnalyzer(df)
232
  meta = analyzer.metadata
233
-
234
- # --- Run all base analyses ---
235
  ai_context = {'is_timeseries': bool(meta['datetime_cols']), 'has_text': bool(meta['text_cols'])}
236
  ai_report = analyzer.generate_ai_narrative(api_key, context=ai_context)
237
  missing_df, num_df, cat_df = analyzer.get_profiling_tables()
238
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
239
 
240
- # --- Configure standard interactive dropdowns ---
241
- update_hist_dd = gr.Dropdown(choices=meta['numeric_cols'], label="Select Column for Histogram", value=meta['numeric_cols'][0] if meta['numeric_cols'] else None)
242
- update_scatter_x = gr.Dropdown(choices=meta['numeric_cols'], label="X-Axis (Numeric)", value=meta['numeric_cols'][0] if meta['numeric_cols'] else None)
243
- update_scatter_y = gr.Dropdown(choices=meta['numeric_cols'], label="Y-Axis (Numeric)", value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None)
244
- update_scatter_color = gr.Dropdown(choices=meta['columns'], label="Color By (Optional)")
245
- update_drill_dd = gr.Dropdown(choices=meta['columns'], label="Select Column to Analyze")
246
-
247
- # --- Configure adaptive module visibility and dropdowns ---
248
- show_ts_tab = gr.Tab(visible=bool(meta['datetime_cols']))
249
- update_ts_date_dd = gr.Dropdown(choices=meta['datetime_cols'])
250
- update_ts_value_dd = gr.Dropdown(choices=meta['numeric_cols'])
251
-
252
- show_text_tab = gr.Tab(visible=bool(meta['text_cols']))
253
- update_text_dd = gr.Dropdown(choices=meta['text_cols'])
254
 
255
- show_cluster_tab = gr.Tab(visible=len(meta['numeric_cols']) > 1)
256
- update_cluster_slider = gr.Slider(visible=len(meta['numeric_cols']) > 1)
257
-
258
- # Return a flat list of all updates in the correct order
259
- return [
260
- analyzer, ai_report,
261
- missing_df, num_df, cat_df,
262
- fig_types, fig_missing, fig_corr,
263
- update_hist_dd, update_scatter_x, update_scatter_y, update_scatter_color, update_drill_dd,
264
- show_ts_tab, update_ts_date_dd, update_ts_value_dd,
265
- show_text_tab, update_text_dd,
266
- show_cluster_tab, update_cluster_slider
267
- ]
268
  except Exception as e:
269
  logging.error(f"A critical error occurred: {e}", exc_info=True)
270
  raise gr.Error(f"Analysis Failed! Error: {str(e)}")
271
 
272
  if __name__ == "__main__":
273
- # You might want to run perform_pre_flight_checks() here
274
  app_instance = create_ui()
275
  app_instance.launch(debug=True, server_name="0.0.0.0")
 
10
  # SETUP: $ pip install -r requirements.txt
11
  #
12
  # AUTHOR: An MCP Expert in Data & AI Solutions
13
+ # VERSION: 4.2 (Bugfix Edition: AI Narrative Engine Restored)
14
+ # LAST-UPDATE: 2023-10-29 (Fixed critical bug where AI was not being called)
15
 
16
  from __future__ import annotations
17
 
18
  import warnings
19
  import logging
20
  import os
 
 
21
  from datetime import datetime
22
+ from typing import Any, Dict, Optional, Tuple
23
 
24
  import gradio as gr
25
  import numpy as np
 
28
  import plotly.graph_objects as go
29
  import google.generativeai as genai
30
 
31
+ # --- Local Adaptive Modules (Requires analysis_modules.py and requirements.txt) ---
32
  from analysis_modules import analyze_time_series, generate_word_cloud, perform_clustering
33
 
34
  # --- Configuration & Setup ---
 
40
  GEMINI_MODEL = 'gemini-1.5-flash-latest'
41
  MAX_UI_ROWS = 50000
42
 
43
+ # --- Core Analysis Engine ---
44
  class DataAnalyzer:
45
+ """The complete DataAnalyzer class, now with a fully functional AI engine."""
 
 
46
  def __init__(self, df: pd.DataFrame):
47
  if not isinstance(df, pd.DataFrame): raise TypeError("Input must be a pandas DataFrame.")
48
  self.df = df
 
59
  numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
60
  categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
61
  datetime_cols = self.df.select_dtypes(include=['datetime64', 'datetimetz']).columns.tolist()
62
+ text_cols = [col for col in categorical_cols if self.df[col].dropna().str.len().mean() > 50]
63
 
64
  high_corr_pairs = []
65
  if len(numeric_cols) > 1:
 
100
  return fig_types, fig_missing, fig_corr
101
 
102
  def generate_ai_narrative(self, api_key: str, context: Dict[str, Any]) -> str:
103
+ """Generates a context-aware AI narrative using the Gemini API."""
104
+ logging.info(f"Generating AI narrative with context: {list(context.keys())}")
105
+ meta = self.metadata
106
+ data_snippet_md = self.df.head(5).to_markdown(index=False)
107
+
108
+ context_prompt = "**PRIMARY ANALYSIS MODES:**\n"
109
+ if context.get('is_timeseries'):
110
+ context_prompt += "- **Time-Series Detected:** Focus on trends, seasonality, and stationarity. Suggest forecasting models.\n"
111
+ if context.get('has_text'):
112
+ context_prompt += "- **Long-Form Text Detected:** Note potential for NLP tasks like sentiment analysis or topic modeling.\n"
113
+ if not context.get('is_timeseries') and not context.get('has_text'):
114
+ context_prompt += "- **General Tabular Data:** Focus on distributions, correlations, and potential for classification/regression modeling.\n"
115
+
116
+ prompt = f"""
117
+ As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report in Markdown format.
118
+
119
+ {context_prompt}
120
+ **DATASET METADATA:**
121
+ - **Shape:** {meta['shape'][0]} rows, {meta['shape'][1]} columns.
122
+ - **Data Quality Score:** {meta['data_quality_score']}%
123
+ - **Total Missing Values:** {meta['total_missing']:,}
124
+ - **Highly Correlated Pairs:** {meta['high_corr_pairs'] if meta['high_corr_pairs'] else 'None detected.'}
125
+ - **Data Snippet (First 5 Rows):**
126
+ {data_snippet_md}
127
+
128
+ **REQUIRED REPORT STRUCTURE:**
129
+
130
+ # πŸš€ AI Data Discovery Report
131
+
132
+ ## πŸ“„ 1. Executive Summary
133
+ * **Primary Objective:** (Deduce the likely purpose of this dataset. What problem could it solve?)
134
+ * **Key Finding:** (State the single most interesting insight you've discovered.)
135
+ * **Overall State:** (Briefly comment on the data's quality and readiness for analysis.)
136
+
137
+ ## 🧐 2. Deep Dive & Quality Assessment
138
+ * **Structural Profile:** (Describe the dataset's composition: numeric, categorical, text, time-series features.)
139
+ * **Data Quality Audit:** (Elaborate on the quality score and missing values. Are they a major concern?)
140
+ * **Redundancy Check:** (Comment on the detected high-correlation pairs and any risks.)
141
+
142
+ ## πŸ’‘ 3. Actionable Recommendations
143
+ * **Data Cleaning:** (Provide a specific recommendation for handling missing data or outliers.)
144
+ * **Feature Engineering:** (Suggest creating a new, valuable feature.)
145
+ * **Next Analytical Steps:** (Propose a specific hypothesis to test or a suitable ML model to build.)
146
+ """
147
+ try:
148
+ genai.configure(api_key=api_key)
149
+ model = genai.GenerativeModel(Config.GEMINI_MODEL)
150
+ response = model.generate_content(prompt)
151
+ if not response.parts:
152
+ blocked_reason = response.prompt_feedback.block_reason.name if response.prompt_feedback else "Unknown"
153
+ logging.warning(f"AI response blocked. Reason: {blocked_reason}")
154
+ return f"❌ **AI Report Generation Blocked by Safety Settings**\n**Reason:** `{blocked_reason}`."
155
+ return response.text
156
+ except Exception as e:
157
+ logging.error(f"Gemini API call failed: {e}", exc_info=True)
158
+ return f"❌ **AI Report Generation Failed**\n**Error:** `{str(e)}`"
159
 
160
  # --- UI Creation ---
161
  def create_ui():
162
  """Defines the complete, integrated Gradio user interface."""
 
 
163
  def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
164
  if not col or not analyzer: return go.Figure()
165
  return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
 
166
  def create_scatterplot(analyzer: DataAnalyzer, x_col: str, y_col:str, color_col:str) -> go.Figure:
167
  if not all([analyzer, x_col, y_col]): return go.Figure()
168
  return px.scatter(analyzer.df, x=x_col, y=y_col, color=color_col, title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white")
 
169
  def analyze_single_column(analyzer: DataAnalyzer, col: str) -> Tuple[str, go.Figure]:
170
  if not col or not analyzer: return "", go.Figure()
171
  series = analyzer.df[col]
 
181
 
182
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), title=Config.APP_TITLE) as demo:
183
  state_analyzer = gr.State()
 
184
  gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
185
  gr.Markdown("Upload your data to receive a complete standard analysis, plus specialized dashboards that unlock automatically based on your data's content.")
 
186
  with gr.Row():
187
  upload_button = gr.File(label="1. Upload Data File (CSV, Excel)", file_types=[".csv", ".xlsx", ".xls"], scale=3)
188
  api_key_input = gr.Textbox(label="2. Enter Google Gemini API Key", type="password", scale=2)
189
  analyze_button = gr.Button("✨ Build My Dashboard", variant="primary", scale=1)
190
 
191
  with gr.Tabs():
 
192
  with gr.Tab("πŸ€– AI Narrative"):
193
  ai_report_output = gr.Markdown("### Your AI-generated report will appear here...")
194
  with gr.Tab("πŸ“‹ Profile"):
 
198
  plot_correlation = gr.Plot()
199
  with gr.Tab("🎨 Interactive Explorer"):
200
  with gr.Row():
201
+ with gr.Column(scale=1): dd_hist_col = gr.Dropdown(label="Select Column for Histogram", interactive=True)
202
+ with gr.Column(scale=2): plot_histogram = gr.Plot()
 
 
203
  with gr.Row():
204
  with gr.Column(scale=1):
205
  dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", interactive=True)
206
  dd_scatter_y = gr.Dropdown(label="Y-Axis (Numeric)", interactive=True)
207
  dd_scatter_color = gr.Dropdown(label="Color By (Optional)", interactive=True)
208
+ with gr.Column(scale=2): plot_scatter = gr.Plot()
 
209
  with gr.Tab("πŸ” Column Deep-Dive"):
210
  dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", interactive=True)
211
+ with gr.Row(): md_drilldown_stats, plot_drilldown = gr.Markdown(), gr.Plot()
 
 
 
212
  with gr.Tab("βŒ› Time-Series Analysis", visible=False) as tab_timeseries:
213
  with gr.Row():
214
  dd_ts_date = gr.Dropdown(label="Select Date/Time Column", interactive=True)
215
  dd_ts_value = gr.Dropdown(label="Select Value Column", interactive=True)
216
  plot_ts_decomp, md_ts_stats = gr.Plot(), gr.Markdown()
 
217
  with gr.Tab("πŸ“ Text Analysis", visible=False) as tab_text:
218
  dd_text_col = gr.Dropdown(label="Select Text Column", interactive=True)
219
  html_word_cloud = gr.HTML()
 
220
  with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster:
221
  num_clusters = gr.Slider(minimum=2, maximum=10, value=4, step=1, label="Number of Clusters (K)", interactive=True)
222
  plot_cluster, md_cluster_summary = gr.Plot(), gr.Markdown()
223
 
 
224
  main_outputs = [
225
  state_analyzer, ai_report_output,
226
  profile_missing_df, profile_numeric_df, profile_categorical_df,
 
228
  dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col,
229
  tab_timeseries, dd_ts_date, dd_ts_value,
230
  tab_text, dd_text_col,
231
+ tab_cluster, num_clusters]
 
232
  analyze_button.click(fn=run_full_analysis, inputs=[upload_button, api_key_input], outputs=main_outputs, show_progress="full")
 
 
233
  dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
234
  scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
235
+ for dd in [dd_scatter_x, dd_scatter_y, dd_scatter_color]: dd.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
 
236
  dd_drilldown_col.change(fn=analyze_single_column, inputs=[state_analyzer, dd_drilldown_col], outputs=[md_drilldown_stats, plot_drilldown])
 
 
237
  ts_inputs = [state_analyzer, dd_ts_date, dd_ts_value]
238
+ for dd in [dd_ts_date, dd_ts_value]: dd.change(fn=lambda a, d, v: analyze_time_series(a.df, d, v), inputs=ts_inputs, outputs=[plot_ts_decomp, md_ts_stats])
 
239
  dd_text_col.change(fn=lambda a, t: generate_word_cloud(a.df, t), inputs=[state_analyzer, dd_text_col], outputs=html_word_cloud)
240
  num_clusters.change(fn=lambda a, k: perform_clustering(a.df, a.metadata['numeric_cols'], k), inputs=[state_analyzer, num_clusters], outputs=[plot_cluster, md_cluster_summary])
 
241
  return demo
242
 
243
  # --- Main Application Logic & Orchestration ---
244
  def run_full_analysis(file_obj: gr.File, api_key: str) -> list:
 
245
  if file_obj is None: raise gr.Error("CRITICAL: No file uploaded.")
246
  if not api_key: raise gr.Error("CRITICAL: Gemini API key is missing.")
 
247
  try:
248
  logging.info(f"Processing uploaded file: {file_obj.name}")
249
  df = pd.read_csv(file_obj.name) if file_obj.name.endswith('.csv') else pd.read_excel(file_obj.name)
250
+ if len(df) > Config.MAX_UI_ROWS: df = df.sample(n=Config.MAX_UI_ROWS, random_state=42)
 
 
 
251
  analyzer = DataAnalyzer(df)
252
  meta = analyzer.metadata
 
 
253
  ai_context = {'is_timeseries': bool(meta['datetime_cols']), 'has_text': bool(meta['text_cols'])}
254
  ai_report = analyzer.generate_ai_narrative(api_key, context=ai_context)
255
  missing_df, num_df, cat_df = analyzer.get_profiling_tables()
256
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
257
 
258
+ update_hist_dd = gr.Dropdown(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None)
259
+ update_scatter_x = gr.Dropdown(choices=meta['numeric_cols'], value=meta['numeric_cols'][0] if meta['numeric_cols'] else None)
260
+ update_scatter_y = gr.Dropdown(choices=meta['numeric_cols'], value=meta['numeric_cols'][1] if len(meta['numeric_cols']) > 1 else None)
261
+ update_scatter_color = gr.Dropdown(choices=meta['columns'])
262
+ update_drill_dd = gr.Dropdown(choices=meta['columns'])
 
 
 
 
 
 
 
 
 
263
 
264
+ show_ts_tab = gr.Tab(visible=bool(meta['datetime_cols']))
265
+ update_ts_date_dd, update_ts_value_dd = gr.Dropdown(choices=meta['datetime_cols']), gr.Dropdown(choices=meta['numeric_cols'])
266
+ show_text_tab, update_text_dd = gr.Tab(visible=bool(meta['text_cols'])), gr.Dropdown(choices=meta['text_cols'])
267
+ show_cluster_tab, update_cluster_slider = gr.Tab(visible=len(meta['numeric_cols']) > 1), gr.Slider(visible=len(meta['numeric_cols']) > 1)
268
+
269
+ return [analyzer, ai_report, missing_df, num_df, cat_df, fig_types, fig_missing, fig_corr,
270
+ update_hist_dd, update_scatter_x, update_scatter_y, update_scatter_color, update_drill_dd,
271
+ show_ts_tab, update_ts_date_dd, update_ts_value_dd,
272
+ show_text_tab, update_text_dd,
273
+ show_cluster_tab, update_cluster_slider]
 
 
 
274
  except Exception as e:
275
  logging.error(f"A critical error occurred: {e}", exc_info=True)
276
  raise gr.Error(f"Analysis Failed! Error: {str(e)}")
277
 
278
  if __name__ == "__main__":
 
279
  app_instance = create_ui()
280
  app_instance.launch(debug=True, server_name="0.0.0.0")