mgbam commited on
Commit
1b21942
Β·
verified Β·
1 Parent(s): 3b1eb8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -99
app.py CHANGED
@@ -4,11 +4,13 @@
4
  #
5
  # DESCRIPTION: An enterprise-grade Gradio application that revolutionizes Exploratory
6
  # Data Analysis (EDA). By integrating Google's Gemini Pro LLM, this
7
- # tool transcends traditional data profiling. It automates the generation
8
- # of statistical summaries, interactive visualizations, and, most
9
- # importantly, a rich, narrative-driven analysis. It delivers
10
- # executive summaries, data quality assessments, actionable insights,
11
- # and strategic recommendations in a single, streamlined workflow.
 
 
12
  #
13
  # ARCHITECTURE: The application is built upon a robust, object-oriented foundation.
14
  # - DataAnalyzer (Core Engine): An encapsulated class that holds the
@@ -19,27 +21,19 @@
19
  # high-quality analytical narratives.
20
  # - Gradio Interface (UI Layer): A multi-tabbed, interactive dashboard
21
  # that logically separates the AI narrative, data profiling, static
22
- # visuals, and interactive exploration tools. State is managed
23
- # efficiently to provide a responsive user experience.
24
- #
25
- # FEATURES:
26
- # - AI-Powered Executive Summary: Generates a high-level overview for stakeholders.
27
- # - Automated Data Quality Audit: Provides a quality score and actionable cleaning steps.
28
- # - Insight Discovery Engine: Uncovers hidden patterns, correlations, and anomalies.
29
- # - Strategic Recommendations: Suggests next steps, modeling approaches, and business use cases.
30
- # - Comprehensive Profiling: Detailed statistical tables for all data types.
31
- # - Interactive Visualization Suite: Dynamic plots for deep-dive analysis.
32
- # - One-Click Report Export: Downloads the complete AI-generated analysis as a Markdown file.
33
  #
34
  # AUTHOR: An MCP Expert in Data & AI Solutions
35
- # VERSION: 3.0 (Enterprise Edition)
36
- # LAST-UPDATE: 2023-10-27
37
 
38
  from __future__ import annotations
39
 
40
  import warnings
41
  import logging
42
  import os
 
 
43
  from datetime import datetime
44
  from typing import Any, Dict, List, Optional, Tuple
45
 
@@ -93,7 +87,6 @@ class DataAnalyzer:
93
  numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
94
  categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
95
 
96
- # Advanced: High correlation pair detection
97
  high_corr_pairs = []
98
  if len(numeric_cols) > 1:
99
  corr_matrix = self.df[numeric_cols].corr().abs()
@@ -120,18 +113,15 @@ class DataAnalyzer:
120
  def get_profiling_tables(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
121
  """Generates structured DataFrames for data profiling."""
122
  logging.info("Generating profiling tables for missing, numeric, and categorical data.")
123
- # Missing data profile
124
  missing = self.df.isnull().sum()
125
  missing_df = pd.DataFrame({
126
  'Missing Count': missing,
127
  'Missing Percentage (%)': (missing / len(self.df) * 100).round(2)
128
  }).reset_index().rename(columns={'index': 'Column'}).sort_values('Missing Count', ascending=False)
129
 
130
- # Numeric features profile
131
  numeric_stats = self.df[self.metadata['numeric_cols']].describe(percentiles=[.01, .25, .5, .75, .99]).T
132
  numeric_stats_df = numeric_stats.round(3).reset_index().rename(columns={'index': 'Column'})
133
 
134
- # Categorical features profile
135
  cat_stats = self.df[self.metadata['categorical_cols']].describe(include=['object', 'category']).T
136
  cat_stats_df = cat_stats.reset_index().rename(columns={'index': 'Column'})
137
 
@@ -174,7 +164,10 @@ class DataAnalyzer:
174
  logging.info("Generating AI narrative with the Gemini API.")
175
  meta = self.metadata
176
 
177
- # A more sophisticated, structured prompt for a better report
 
 
 
178
  prompt = f"""
179
  As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report.
180
  Analyze the following dataset context and produce a professional, insightful, and clear analysis in Markdown format.
@@ -188,7 +181,7 @@ class DataAnalyzer:
188
  - **Total Missing Values:** {meta['total_missing']:,}
189
  - **High-Correlation Pairs (>{Config.CORR_THRESHOLD}):** {meta['high_corr_pairs'] if meta['high_corr_pairs'] else 'None detected.'}
190
  - **Data Snippet (First 5 Rows):**
191
- {self.df.head(5).to_markdown(index=False)}
192
 
193
  **REQUIRED REPORT STRUCTURE (Strictly use this Markdown format):**
194
 
@@ -240,8 +233,6 @@ class DataAnalyzer:
240
 
241
  def create_ui():
242
  """Defines and builds the Gradio user interface."""
243
-
244
- # --- Interactive Plotting Functions (scoped inside UI creation for clarity) ---
245
  def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
246
  if not col or not analyzer: return go.Figure()
247
  return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
@@ -279,11 +270,8 @@ def create_ui():
279
 
280
  return stats_md, fig
281
 
282
- # --- Main UI Blocks ---
283
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"), title=Config.APP_TITLE) as demo:
284
- # Store for the main DataAnalyzer object
285
  state_analyzer = gr.State()
286
-
287
  gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
288
  gr.Markdown("Upload a CSV file, provide your Gemini API key, and receive an instant, AI-driven analysis of your data.")
289
 
@@ -295,37 +283,30 @@ def create_ui():
295
  with gr.Column(scale=1, min_width=150):
296
  analyze_button = gr.Button("✨ Generate Analysis", variant="primary")
297
 
298
- with gr.Tabs() as tabs:
299
- with gr.Tab("πŸ€– AI Narrative", id=0):
300
  ai_report_output = gr.Markdown("Your AI-generated report will appear here once analysis is complete...")
301
  download_report_button = gr.Button("⬇️ Download Full Report", visible=False)
302
-
303
- with gr.Tab(" Profile", id=1):
304
  gr.Markdown("### **Detailed Data Profile**")
305
- gr.Markdown("#### Missing Data Summary")
306
  profile_missing_df = gr.DataFrame(interactive=False, label="Missing Values")
307
- gr.Markdown("#### Numeric Features Summary")
308
  profile_numeric_df = gr.DataFrame(interactive=False, label="Numeric Stats")
309
- gr.Markdown("#### Categorical Features Summary")
310
  profile_categorical_df = gr.DataFrame(interactive=False, label="Categorical Stats")
311
-
312
- with gr.Tab("πŸ“ˆ Overview Visuals", id=2):
313
  gr.Markdown("### **At-a-Glance Visualizations**")
314
  with gr.Row():
315
  plot_types = gr.Plot()
316
  plot_missing = gr.Plot()
317
  plot_correlation = gr.Plot()
318
-
319
- with gr.Tab("🎨 Interactive Explorer", id=3):
320
  gr.Markdown("### **Visually Explore Feature Relationships**")
321
- with gr.Row():
322
  with gr.Column(scale=1):
323
  gr.Markdown("#### Univariate Analysis")
324
  dd_hist_col = gr.Dropdown(label="Select Column for Histogram", visible=False)
325
  with gr.Column(scale=2):
326
  plot_histogram = gr.Plot()
327
-
328
- with gr.Row():
329
  with gr.Column(scale=1):
330
  gr.Markdown("#### Bivariate Analysis (Scatter Plot)")
331
  dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", visible=False)
@@ -333,8 +314,7 @@ def create_ui():
333
  dd_scatter_color = gr.Dropdown(label="Color By (Optional)", visible=False)
334
  with gr.Column(scale=2):
335
  plot_scatter = gr.Plot()
336
-
337
- with gr.Tab("πŸ” Column Deep-Dive", id=4):
338
  gr.Markdown("### **Inspect a Single Column in Detail**")
339
  dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", visible=False)
340
  with gr.Row():
@@ -344,54 +324,29 @@ def create_ui():
344
  gr.HTML("""
345
  <div style="text-align: center; margin-top: 20px; font-family: sans-serif; color: #777;">
346
  <p>πŸ’‘ Need an API key? Get one from <a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>.</p>
347
- <p>CognitiveEDA v3.0 | An MCP Expert System</p>
348
  </div>
349
  """)
350
 
351
- # --- Event Listeners & Control Flow ---
352
-
353
  outputs_for_main_analysis = [
354
  state_analyzer, ai_report_output, download_report_button,
355
  profile_missing_df, profile_numeric_df, profile_categorical_df,
356
  plot_types, plot_missing, plot_correlation,
357
  dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col
358
  ]
359
-
360
- analyze_button.click(
361
- fn=run_full_analysis,
362
- inputs=[upload_button, api_key_input],
363
- outputs=outputs_for_main_analysis
364
- )
365
-
366
- # Interactive plot triggers
367
  dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
368
-
369
  scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
370
- dd_scatter_x.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
371
- dd_scatter_y.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
372
- dd_scatter_color.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
373
-
374
- dd_drilldown_col.change(
375
- fn=analyze_single_column,
376
- inputs=[state_analyzer, dd_drilldown_col],
377
- outputs=[md_drilldown_stats, plot_drilldown]
378
- )
379
-
380
- download_report_button.click(
381
- fn=download_report_file,
382
- inputs=[state_analyzer, ai_report_output],
383
- outputs=gr.File(label="Download Report")
384
- )
385
-
386
  return demo
387
 
388
  # --- Main Application Logic ---
389
 
390
  def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any]:
391
- """
392
- Orchestrates the entire analysis pipeline upon button click.
393
- Returns a dictionary to update all relevant UI components at once.
394
- """
395
  if file_obj is None:
396
  raise gr.Error("CRITICAL: No file uploaded. Please select a CSV file.")
397
  if not api_key:
@@ -402,43 +357,30 @@ def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any
402
  df = pd.read_csv(file_obj.name)
403
  analyzer = DataAnalyzer(df)
404
 
405
- # --- Execute all analysis tasks concurrently (conceptually) ---
406
  ai_report = analyzer.generate_ai_narrative(api_key)
407
  missing_df, num_df, cat_df = analyzer.get_profiling_tables()
408
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
409
 
410
- # --- Prepare UI component updates ---
411
  meta = analyzer.metadata
412
- all_cols, num_cols, cat_cols = meta['columns'], meta['numeric_cols'], meta['categorical_cols']
413
 
414
- # Return a dictionary mapping components to their new state/value
415
  return {
416
- # State & AI Report
417
- state_analyzer: analyzer,
418
- ai_report_output: ai_report,
419
  download_report_button: gr.Button(visible=True),
420
- # Profiling Tab
421
- profile_missing_df: missing_df,
422
- profile_numeric_df: num_df,
423
- profile_categorical_df: cat_df,
424
- # Overview Visuals Tab
425
- plot_types: fig_types,
426
- plot_missing: fig_missing,
427
- plot_correlation: fig_corr,
428
- # Interactive Explorer & Drilldown Dropdown Updates
429
  dd_hist_col: gr.Dropdown(choices=num_cols, label="Select Numeric Column", visible=True),
430
  dd_scatter_x: gr.Dropdown(choices=num_cols, label="X-Axis (Numeric)", visible=True),
431
  dd_scatter_y: gr.Dropdown(choices=num_cols, label="Y-Axis (Numeric)", visible=True),
432
  dd_scatter_color: gr.Dropdown(choices=all_cols, label="Color By (Optional)", visible=True),
433
  dd_drilldown_col: gr.Dropdown(choices=all_cols, label="Select Column to Analyze", visible=True)
434
  }
435
-
436
  except Exception as e:
437
  logging.error(f"A critical error occurred during file processing: {e}", exc_info=True)
438
  raise gr.Error(f"Analysis Failed! The process stopped due to: {str(e)}")
439
 
440
-
441
- def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> str:
442
  """Generates a comprehensive Markdown file for download."""
443
  if not analyzer:
444
  logging.warning("Download attempted without a valid analyzer object.")
@@ -446,8 +388,6 @@ def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> str:
446
 
447
  filename = f"CognitiveEDA_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
448
  meta = analyzer.metadata
449
-
450
- # Assemble the full report
451
  full_report = f"# CognitiveEDA - Data Discovery Report\n"
452
  full_report += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
453
  full_report += f"## Dataset Overview\n"
@@ -459,11 +399,27 @@ def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> str:
459
 
460
  with open(filename, "w", encoding="utf-8") as f:
461
  f.write(full_report)
462
-
463
  logging.info(f"Report file generated successfully: {filename}")
464
  return filename
465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
  if __name__ == "__main__":
 
468
  app_instance = create_ui()
469
  app_instance.launch(debug=True, server_name="0.0.0.0")
 
4
  #
5
  # DESCRIPTION: An enterprise-grade Gradio application that revolutionizes Exploratory
6
  # Data Analysis (EDA). By integrating Google's Gemini Pro LLM, this
7
+ # tool transcends traditional data profiling to deliver a rich,
8
+ # narrative-driven analysis, actionable insights, and strategic
9
+ # recommendations in a single, streamlined workflow.
10
+ #
11
+ # SETUP: This application has external dependencies. Before running, install
12
+ # all required packages using the requirements.txt file:
13
+ # $ pip install -r requirements.txt
14
  #
15
  # ARCHITECTURE: The application is built upon a robust, object-oriented foundation.
16
  # - DataAnalyzer (Core Engine): An encapsulated class that holds the
 
21
  # high-quality analytical narratives.
22
  # - Gradio Interface (UI Layer): A multi-tabbed, interactive dashboard
23
  # that logically separates the AI narrative, data profiling, static
24
+ # visuals, and interactive exploration tools.
 
 
 
 
 
 
 
 
 
 
25
  #
26
  # AUTHOR: An MCP Expert in Data & AI Solutions
27
+ # VERSION: 3.1 (Enterprise Edition)
28
+ # LAST-UPDATE: 2023-10-28 (Added dependency check & requirements file)
29
 
30
  from __future__ import annotations
31
 
32
  import warnings
33
  import logging
34
  import os
35
+ import sys
36
+ import importlib.util
37
  from datetime import datetime
38
  from typing import Any, Dict, List, Optional, Tuple
39
 
 
87
  numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
88
  categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
89
 
 
90
  high_corr_pairs = []
91
  if len(numeric_cols) > 1:
92
  corr_matrix = self.df[numeric_cols].corr().abs()
 
113
  def get_profiling_tables(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
114
  """Generates structured DataFrames for data profiling."""
115
  logging.info("Generating profiling tables for missing, numeric, and categorical data.")
 
116
  missing = self.df.isnull().sum()
117
  missing_df = pd.DataFrame({
118
  'Missing Count': missing,
119
  'Missing Percentage (%)': (missing / len(self.df) * 100).round(2)
120
  }).reset_index().rename(columns={'index': 'Column'}).sort_values('Missing Count', ascending=False)
121
 
 
122
  numeric_stats = self.df[self.metadata['numeric_cols']].describe(percentiles=[.01, .25, .5, .75, .99]).T
123
  numeric_stats_df = numeric_stats.round(3).reset_index().rename(columns={'index': 'Column'})
124
 
 
125
  cat_stats = self.df[self.metadata['categorical_cols']].describe(include=['object', 'category']).T
126
  cat_stats_df = cat_stats.reset_index().rename(columns={'index': 'Column'})
127
 
 
164
  logging.info("Generating AI narrative with the Gemini API.")
165
  meta = self.metadata
166
 
167
+ # NOTE: The .to_markdown() method requires the 'tabulate' library.
168
+ # This is handled by the pre-flight check in if __name__ == "__main__":
169
+ data_snippet_md = self.df.head(5).to_markdown(index=False)
170
+
171
  prompt = f"""
172
  As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report.
173
  Analyze the following dataset context and produce a professional, insightful, and clear analysis in Markdown format.
 
181
  - **Total Missing Values:** {meta['total_missing']:,}
182
  - **High-Correlation Pairs (>{Config.CORR_THRESHOLD}):** {meta['high_corr_pairs'] if meta['high_corr_pairs'] else 'None detected.'}
183
  - **Data Snippet (First 5 Rows):**
184
+ {data_snippet_md}
185
 
186
  **REQUIRED REPORT STRUCTURE (Strictly use this Markdown format):**
187
 
 
233
 
234
  def create_ui():
235
  """Defines and builds the Gradio user interface."""
 
 
236
  def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
237
  if not col or not analyzer: return go.Figure()
238
  return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
 
270
 
271
  return stats_md, fig
272
 
 
273
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"), title=Config.APP_TITLE) as demo:
 
274
  state_analyzer = gr.State()
 
275
  gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
276
  gr.Markdown("Upload a CSV file, provide your Gemini API key, and receive an instant, AI-driven analysis of your data.")
277
 
 
283
  with gr.Column(scale=1, min_width=150):
284
  analyze_button = gr.Button("✨ Generate Analysis", variant="primary")
285
 
286
+ with gr.Tabs():
287
+ with gr.Tab("πŸ€– AI Narrative"):
288
  ai_report_output = gr.Markdown("Your AI-generated report will appear here once analysis is complete...")
289
  download_report_button = gr.Button("⬇️ Download Full Report", visible=False)
290
+ with gr.Tab("Profile"):
 
291
  gr.Markdown("### **Detailed Data Profile**")
 
292
  profile_missing_df = gr.DataFrame(interactive=False, label="Missing Values")
 
293
  profile_numeric_df = gr.DataFrame(interactive=False, label="Numeric Stats")
 
294
  profile_categorical_df = gr.DataFrame(interactive=False, label="Categorical Stats")
295
+ with gr.Tab("πŸ“ˆ Overview Visuals"):
 
296
  gr.Markdown("### **At-a-Glance Visualizations**")
297
  with gr.Row():
298
  plot_types = gr.Plot()
299
  plot_missing = gr.Plot()
300
  plot_correlation = gr.Plot()
301
+ with gr.Tab("🎨 Interactive Explorer"):
 
302
  gr.Markdown("### **Visually Explore Feature Relationships**")
303
+ with gr.Row(equal_height=False):
304
  with gr.Column(scale=1):
305
  gr.Markdown("#### Univariate Analysis")
306
  dd_hist_col = gr.Dropdown(label="Select Column for Histogram", visible=False)
307
  with gr.Column(scale=2):
308
  plot_histogram = gr.Plot()
309
+ with gr.Row(equal_height=False):
 
310
  with gr.Column(scale=1):
311
  gr.Markdown("#### Bivariate Analysis (Scatter Plot)")
312
  dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", visible=False)
 
314
  dd_scatter_color = gr.Dropdown(label="Color By (Optional)", visible=False)
315
  with gr.Column(scale=2):
316
  plot_scatter = gr.Plot()
317
+ with gr.Tab("πŸ” Column Deep-Dive"):
 
318
  gr.Markdown("### **Inspect a Single Column in Detail**")
319
  dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", visible=False)
320
  with gr.Row():
 
324
  gr.HTML("""
325
  <div style="text-align: center; margin-top: 20px; font-family: sans-serif; color: #777;">
326
  <p>πŸ’‘ Need an API key? Get one from <a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>.</p>
327
+ <p>CognitiveEDA v3.1 | An MCP Expert System</p>
328
  </div>
329
  """)
330
 
 
 
331
  outputs_for_main_analysis = [
332
  state_analyzer, ai_report_output, download_report_button,
333
  profile_missing_df, profile_numeric_df, profile_categorical_df,
334
  plot_types, plot_missing, plot_correlation,
335
  dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col
336
  ]
337
+ analyze_button.click(fn=run_full_analysis, inputs=[upload_button, api_key_input], outputs=outputs_for_main_analysis)
 
 
 
 
 
 
 
338
  dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
 
339
  scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
340
+ for dd in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
341
+ dd.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
342
+ dd_drilldown_col.change(fn=analyze_single_column, inputs=[state_analyzer, dd_drilldown_col], outputs=[md_drilldown_stats, plot_drilldown])
343
+ download_report_button.click(fn=download_report_file, inputs=[state_analyzer, ai_report_output], outputs=gr.File(label="Download Report"))
 
 
 
 
 
 
 
 
 
 
 
 
344
  return demo
345
 
346
  # --- Main Application Logic ---
347
 
348
  def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any]:
349
+ """Orchestrates the entire analysis pipeline upon button click."""
 
 
 
350
  if file_obj is None:
351
  raise gr.Error("CRITICAL: No file uploaded. Please select a CSV file.")
352
  if not api_key:
 
357
  df = pd.read_csv(file_obj.name)
358
  analyzer = DataAnalyzer(df)
359
 
 
360
  ai_report = analyzer.generate_ai_narrative(api_key)
361
  missing_df, num_df, cat_df = analyzer.get_profiling_tables()
362
  fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
363
 
 
364
  meta = analyzer.metadata
365
+ all_cols, num_cols = meta['columns'], meta['numeric_cols']
366
 
 
367
  return {
368
+ state_analyzer: analyzer, ai_report_output: ai_report,
 
 
369
  download_report_button: gr.Button(visible=True),
370
+ profile_missing_df: missing_df, profile_numeric_df: num_df,
371
+ profile_categorical_df: cat_df, plot_types: fig_types,
372
+ plot_missing: fig_missing, plot_correlation: fig_corr,
 
 
 
 
 
 
373
  dd_hist_col: gr.Dropdown(choices=num_cols, label="Select Numeric Column", visible=True),
374
  dd_scatter_x: gr.Dropdown(choices=num_cols, label="X-Axis (Numeric)", visible=True),
375
  dd_scatter_y: gr.Dropdown(choices=num_cols, label="Y-Axis (Numeric)", visible=True),
376
  dd_scatter_color: gr.Dropdown(choices=all_cols, label="Color By (Optional)", visible=True),
377
  dd_drilldown_col: gr.Dropdown(choices=all_cols, label="Select Column to Analyze", visible=True)
378
  }
 
379
  except Exception as e:
380
  logging.error(f"A critical error occurred during file processing: {e}", exc_info=True)
381
  raise gr.Error(f"Analysis Failed! The process stopped due to: {str(e)}")
382
 
383
+ def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> Optional[str]:
 
384
  """Generates a comprehensive Markdown file for download."""
385
  if not analyzer:
386
  logging.warning("Download attempted without a valid analyzer object.")
 
388
 
389
  filename = f"CognitiveEDA_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
390
  meta = analyzer.metadata
 
 
391
  full_report = f"# CognitiveEDA - Data Discovery Report\n"
392
  full_report += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
393
  full_report += f"## Dataset Overview\n"
 
399
 
400
  with open(filename, "w", encoding="utf-8") as f:
401
  f.write(full_report)
 
402
  logging.info(f"Report file generated successfully: {filename}")
403
  return filename
404
 
405
+ def perform_pre_flight_checks():
406
+ """Checks for critical dependencies before launching the app."""
407
+ logging.info("Performing pre-flight dependency checks...")
408
+ required_packages = ["pandas", "gradio", "plotly", "google.generativeai", "tabulate"]
409
+ missing_packages = [pkg for pkg in required_packages if importlib.util.find_spec(pkg) is None]
410
+
411
+ if missing_packages:
412
+ logging.critical(f"Missing critical packages: {', '.join(missing_packages)}")
413
+ print("\n" + "="*80)
414
+ print("ERROR: Your environment is missing critical dependencies.")
415
+ print(f"Missing package(s): {', '.join(missing_packages)}")
416
+ print("Please install all required packages using the requirements.txt file:")
417
+ print("pip install -r requirements.txt")
418
+ print("="*80 + "\n")
419
+ sys.exit(1)
420
+ logging.info("All dependencies are satisfied. Proceeding with launch.")
421
 
422
  if __name__ == "__main__":
423
+ perform_pre_flight_checks()
424
  app_instance = create_ui()
425
  app_instance.launch(debug=True, server_name="0.0.0.0")