Update app.py
Browse files
app.py
CHANGED
@@ -4,11 +4,13 @@
|
|
4 |
#
|
5 |
# DESCRIPTION: An enterprise-grade Gradio application that revolutionizes Exploratory
|
6 |
# Data Analysis (EDA). By integrating Google's Gemini Pro LLM, this
|
7 |
-
# tool transcends traditional data profiling
|
8 |
-
#
|
9 |
-
#
|
10 |
-
#
|
11 |
-
#
|
|
|
|
|
12 |
#
|
13 |
# ARCHITECTURE: The application is built upon a robust, object-oriented foundation.
|
14 |
# - DataAnalyzer (Core Engine): An encapsulated class that holds the
|
@@ -19,27 +21,19 @@
|
|
19 |
# high-quality analytical narratives.
|
20 |
# - Gradio Interface (UI Layer): A multi-tabbed, interactive dashboard
|
21 |
# that logically separates the AI narrative, data profiling, static
|
22 |
-
# visuals, and interactive exploration tools.
|
23 |
-
# efficiently to provide a responsive user experience.
|
24 |
-
#
|
25 |
-
# FEATURES:
|
26 |
-
# - AI-Powered Executive Summary: Generates a high-level overview for stakeholders.
|
27 |
-
# - Automated Data Quality Audit: Provides a quality score and actionable cleaning steps.
|
28 |
-
# - Insight Discovery Engine: Uncovers hidden patterns, correlations, and anomalies.
|
29 |
-
# - Strategic Recommendations: Suggests next steps, modeling approaches, and business use cases.
|
30 |
-
# - Comprehensive Profiling: Detailed statistical tables for all data types.
|
31 |
-
# - Interactive Visualization Suite: Dynamic plots for deep-dive analysis.
|
32 |
-
# - One-Click Report Export: Downloads the complete AI-generated analysis as a Markdown file.
|
33 |
#
|
34 |
# AUTHOR: An MCP Expert in Data & AI Solutions
|
35 |
-
# VERSION: 3.
|
36 |
-
# LAST-UPDATE: 2023-10-
|
37 |
|
38 |
from __future__ import annotations
|
39 |
|
40 |
import warnings
|
41 |
import logging
|
42 |
import os
|
|
|
|
|
43 |
from datetime import datetime
|
44 |
from typing import Any, Dict, List, Optional, Tuple
|
45 |
|
@@ -93,7 +87,6 @@ class DataAnalyzer:
|
|
93 |
numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
|
94 |
categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
|
95 |
|
96 |
-
# Advanced: High correlation pair detection
|
97 |
high_corr_pairs = []
|
98 |
if len(numeric_cols) > 1:
|
99 |
corr_matrix = self.df[numeric_cols].corr().abs()
|
@@ -120,18 +113,15 @@ class DataAnalyzer:
|
|
120 |
def get_profiling_tables(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
121 |
"""Generates structured DataFrames for data profiling."""
|
122 |
logging.info("Generating profiling tables for missing, numeric, and categorical data.")
|
123 |
-
# Missing data profile
|
124 |
missing = self.df.isnull().sum()
|
125 |
missing_df = pd.DataFrame({
|
126 |
'Missing Count': missing,
|
127 |
'Missing Percentage (%)': (missing / len(self.df) * 100).round(2)
|
128 |
}).reset_index().rename(columns={'index': 'Column'}).sort_values('Missing Count', ascending=False)
|
129 |
|
130 |
-
# Numeric features profile
|
131 |
numeric_stats = self.df[self.metadata['numeric_cols']].describe(percentiles=[.01, .25, .5, .75, .99]).T
|
132 |
numeric_stats_df = numeric_stats.round(3).reset_index().rename(columns={'index': 'Column'})
|
133 |
|
134 |
-
# Categorical features profile
|
135 |
cat_stats = self.df[self.metadata['categorical_cols']].describe(include=['object', 'category']).T
|
136 |
cat_stats_df = cat_stats.reset_index().rename(columns={'index': 'Column'})
|
137 |
|
@@ -174,7 +164,10 @@ class DataAnalyzer:
|
|
174 |
logging.info("Generating AI narrative with the Gemini API.")
|
175 |
meta = self.metadata
|
176 |
|
177 |
-
#
|
|
|
|
|
|
|
178 |
prompt = f"""
|
179 |
As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report.
|
180 |
Analyze the following dataset context and produce a professional, insightful, and clear analysis in Markdown format.
|
@@ -188,7 +181,7 @@ class DataAnalyzer:
|
|
188 |
- **Total Missing Values:** {meta['total_missing']:,}
|
189 |
- **High-Correlation Pairs (>{Config.CORR_THRESHOLD}):** {meta['high_corr_pairs'] if meta['high_corr_pairs'] else 'None detected.'}
|
190 |
- **Data Snippet (First 5 Rows):**
|
191 |
-
{
|
192 |
|
193 |
**REQUIRED REPORT STRUCTURE (Strictly use this Markdown format):**
|
194 |
|
@@ -240,8 +233,6 @@ class DataAnalyzer:
|
|
240 |
|
241 |
def create_ui():
|
242 |
"""Defines and builds the Gradio user interface."""
|
243 |
-
|
244 |
-
# --- Interactive Plotting Functions (scoped inside UI creation for clarity) ---
|
245 |
def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
|
246 |
if not col or not analyzer: return go.Figure()
|
247 |
return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
|
@@ -279,11 +270,8 @@ def create_ui():
|
|
279 |
|
280 |
return stats_md, fig
|
281 |
|
282 |
-
# --- Main UI Blocks ---
|
283 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"), title=Config.APP_TITLE) as demo:
|
284 |
-
# Store for the main DataAnalyzer object
|
285 |
state_analyzer = gr.State()
|
286 |
-
|
287 |
gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
|
288 |
gr.Markdown("Upload a CSV file, provide your Gemini API key, and receive an instant, AI-driven analysis of your data.")
|
289 |
|
@@ -295,37 +283,30 @@ def create_ui():
|
|
295 |
with gr.Column(scale=1, min_width=150):
|
296 |
analyze_button = gr.Button("β¨ Generate Analysis", variant="primary")
|
297 |
|
298 |
-
with gr.Tabs()
|
299 |
-
with gr.Tab("π€ AI Narrative"
|
300 |
ai_report_output = gr.Markdown("Your AI-generated report will appear here once analysis is complete...")
|
301 |
download_report_button = gr.Button("β¬οΈ Download Full Report", visible=False)
|
302 |
-
|
303 |
-
with gr.Tab(" Profile", id=1):
|
304 |
gr.Markdown("### **Detailed Data Profile**")
|
305 |
-
gr.Markdown("#### Missing Data Summary")
|
306 |
profile_missing_df = gr.DataFrame(interactive=False, label="Missing Values")
|
307 |
-
gr.Markdown("#### Numeric Features Summary")
|
308 |
profile_numeric_df = gr.DataFrame(interactive=False, label="Numeric Stats")
|
309 |
-
gr.Markdown("#### Categorical Features Summary")
|
310 |
profile_categorical_df = gr.DataFrame(interactive=False, label="Categorical Stats")
|
311 |
-
|
312 |
-
with gr.Tab("π Overview Visuals", id=2):
|
313 |
gr.Markdown("### **At-a-Glance Visualizations**")
|
314 |
with gr.Row():
|
315 |
plot_types = gr.Plot()
|
316 |
plot_missing = gr.Plot()
|
317 |
plot_correlation = gr.Plot()
|
318 |
-
|
319 |
-
with gr.Tab("π¨ Interactive Explorer", id=3):
|
320 |
gr.Markdown("### **Visually Explore Feature Relationships**")
|
321 |
-
with gr.Row():
|
322 |
with gr.Column(scale=1):
|
323 |
gr.Markdown("#### Univariate Analysis")
|
324 |
dd_hist_col = gr.Dropdown(label="Select Column for Histogram", visible=False)
|
325 |
with gr.Column(scale=2):
|
326 |
plot_histogram = gr.Plot()
|
327 |
-
|
328 |
-
with gr.Row():
|
329 |
with gr.Column(scale=1):
|
330 |
gr.Markdown("#### Bivariate Analysis (Scatter Plot)")
|
331 |
dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", visible=False)
|
@@ -333,8 +314,7 @@ def create_ui():
|
|
333 |
dd_scatter_color = gr.Dropdown(label="Color By (Optional)", visible=False)
|
334 |
with gr.Column(scale=2):
|
335 |
plot_scatter = gr.Plot()
|
336 |
-
|
337 |
-
with gr.Tab("π Column Deep-Dive", id=4):
|
338 |
gr.Markdown("### **Inspect a Single Column in Detail**")
|
339 |
dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", visible=False)
|
340 |
with gr.Row():
|
@@ -344,54 +324,29 @@ def create_ui():
|
|
344 |
gr.HTML("""
|
345 |
<div style="text-align: center; margin-top: 20px; font-family: sans-serif; color: #777;">
|
346 |
<p>π‘ Need an API key? Get one from <a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>.</p>
|
347 |
-
<p>CognitiveEDA v3.
|
348 |
</div>
|
349 |
""")
|
350 |
|
351 |
-
# --- Event Listeners & Control Flow ---
|
352 |
-
|
353 |
outputs_for_main_analysis = [
|
354 |
state_analyzer, ai_report_output, download_report_button,
|
355 |
profile_missing_df, profile_numeric_df, profile_categorical_df,
|
356 |
plot_types, plot_missing, plot_correlation,
|
357 |
dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col
|
358 |
]
|
359 |
-
|
360 |
-
analyze_button.click(
|
361 |
-
fn=run_full_analysis,
|
362 |
-
inputs=[upload_button, api_key_input],
|
363 |
-
outputs=outputs_for_main_analysis
|
364 |
-
)
|
365 |
-
|
366 |
-
# Interactive plot triggers
|
367 |
dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
|
368 |
-
|
369 |
scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
|
370 |
-
dd_scatter_x
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
dd_drilldown_col.change(
|
375 |
-
fn=analyze_single_column,
|
376 |
-
inputs=[state_analyzer, dd_drilldown_col],
|
377 |
-
outputs=[md_drilldown_stats, plot_drilldown]
|
378 |
-
)
|
379 |
-
|
380 |
-
download_report_button.click(
|
381 |
-
fn=download_report_file,
|
382 |
-
inputs=[state_analyzer, ai_report_output],
|
383 |
-
outputs=gr.File(label="Download Report")
|
384 |
-
)
|
385 |
-
|
386 |
return demo
|
387 |
|
388 |
# --- Main Application Logic ---
|
389 |
|
390 |
def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any]:
|
391 |
-
"""
|
392 |
-
Orchestrates the entire analysis pipeline upon button click.
|
393 |
-
Returns a dictionary to update all relevant UI components at once.
|
394 |
-
"""
|
395 |
if file_obj is None:
|
396 |
raise gr.Error("CRITICAL: No file uploaded. Please select a CSV file.")
|
397 |
if not api_key:
|
@@ -402,43 +357,30 @@ def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any
|
|
402 |
df = pd.read_csv(file_obj.name)
|
403 |
analyzer = DataAnalyzer(df)
|
404 |
|
405 |
-
# --- Execute all analysis tasks concurrently (conceptually) ---
|
406 |
ai_report = analyzer.generate_ai_narrative(api_key)
|
407 |
missing_df, num_df, cat_df = analyzer.get_profiling_tables()
|
408 |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
|
409 |
|
410 |
-
# --- Prepare UI component updates ---
|
411 |
meta = analyzer.metadata
|
412 |
-
all_cols, num_cols
|
413 |
|
414 |
-
# Return a dictionary mapping components to their new state/value
|
415 |
return {
|
416 |
-
|
417 |
-
state_analyzer: analyzer,
|
418 |
-
ai_report_output: ai_report,
|
419 |
download_report_button: gr.Button(visible=True),
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
profile_categorical_df: cat_df,
|
424 |
-
# Overview Visuals Tab
|
425 |
-
plot_types: fig_types,
|
426 |
-
plot_missing: fig_missing,
|
427 |
-
plot_correlation: fig_corr,
|
428 |
-
# Interactive Explorer & Drilldown Dropdown Updates
|
429 |
dd_hist_col: gr.Dropdown(choices=num_cols, label="Select Numeric Column", visible=True),
|
430 |
dd_scatter_x: gr.Dropdown(choices=num_cols, label="X-Axis (Numeric)", visible=True),
|
431 |
dd_scatter_y: gr.Dropdown(choices=num_cols, label="Y-Axis (Numeric)", visible=True),
|
432 |
dd_scatter_color: gr.Dropdown(choices=all_cols, label="Color By (Optional)", visible=True),
|
433 |
dd_drilldown_col: gr.Dropdown(choices=all_cols, label="Select Column to Analyze", visible=True)
|
434 |
}
|
435 |
-
|
436 |
except Exception as e:
|
437 |
logging.error(f"A critical error occurred during file processing: {e}", exc_info=True)
|
438 |
raise gr.Error(f"Analysis Failed! The process stopped due to: {str(e)}")
|
439 |
|
440 |
-
|
441 |
-
def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> str:
|
442 |
"""Generates a comprehensive Markdown file for download."""
|
443 |
if not analyzer:
|
444 |
logging.warning("Download attempted without a valid analyzer object.")
|
@@ -446,8 +388,6 @@ def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> str:
|
|
446 |
|
447 |
filename = f"CognitiveEDA_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
448 |
meta = analyzer.metadata
|
449 |
-
|
450 |
-
# Assemble the full report
|
451 |
full_report = f"# CognitiveEDA - Data Discovery Report\n"
|
452 |
full_report += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
453 |
full_report += f"## Dataset Overview\n"
|
@@ -459,11 +399,27 @@ def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> str:
|
|
459 |
|
460 |
with open(filename, "w", encoding="utf-8") as f:
|
461 |
f.write(full_report)
|
462 |
-
|
463 |
logging.info(f"Report file generated successfully: {filename}")
|
464 |
return filename
|
465 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
if __name__ == "__main__":
|
|
|
468 |
app_instance = create_ui()
|
469 |
app_instance.launch(debug=True, server_name="0.0.0.0")
|
|
|
4 |
#
|
5 |
# DESCRIPTION: An enterprise-grade Gradio application that revolutionizes Exploratory
|
6 |
# Data Analysis (EDA). By integrating Google's Gemini Pro LLM, this
|
7 |
+
# tool transcends traditional data profiling to deliver a rich,
|
8 |
+
# narrative-driven analysis, actionable insights, and strategic
|
9 |
+
# recommendations in a single, streamlined workflow.
|
10 |
+
#
|
11 |
+
# SETUP: This application has external dependencies. Before running, install
|
12 |
+
# all required packages using the requirements.txt file:
|
13 |
+
# $ pip install -r requirements.txt
|
14 |
#
|
15 |
# ARCHITECTURE: The application is built upon a robust, object-oriented foundation.
|
16 |
# - DataAnalyzer (Core Engine): An encapsulated class that holds the
|
|
|
21 |
# high-quality analytical narratives.
|
22 |
# - Gradio Interface (UI Layer): A multi-tabbed, interactive dashboard
|
23 |
# that logically separates the AI narrative, data profiling, static
|
24 |
+
# visuals, and interactive exploration tools.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
#
|
26 |
# AUTHOR: An MCP Expert in Data & AI Solutions
|
27 |
+
# VERSION: 3.1 (Enterprise Edition)
|
28 |
+
# LAST-UPDATE: 2023-10-28 (Added dependency check & requirements file)
|
29 |
|
30 |
from __future__ import annotations
|
31 |
|
32 |
import warnings
|
33 |
import logging
|
34 |
import os
|
35 |
+
import sys
|
36 |
+
import importlib.util
|
37 |
from datetime import datetime
|
38 |
from typing import Any, Dict, List, Optional, Tuple
|
39 |
|
|
|
87 |
numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
|
88 |
categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
|
89 |
|
|
|
90 |
high_corr_pairs = []
|
91 |
if len(numeric_cols) > 1:
|
92 |
corr_matrix = self.df[numeric_cols].corr().abs()
|
|
|
113 |
def get_profiling_tables(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
114 |
"""Generates structured DataFrames for data profiling."""
|
115 |
logging.info("Generating profiling tables for missing, numeric, and categorical data.")
|
|
|
116 |
missing = self.df.isnull().sum()
|
117 |
missing_df = pd.DataFrame({
|
118 |
'Missing Count': missing,
|
119 |
'Missing Percentage (%)': (missing / len(self.df) * 100).round(2)
|
120 |
}).reset_index().rename(columns={'index': 'Column'}).sort_values('Missing Count', ascending=False)
|
121 |
|
|
|
122 |
numeric_stats = self.df[self.metadata['numeric_cols']].describe(percentiles=[.01, .25, .5, .75, .99]).T
|
123 |
numeric_stats_df = numeric_stats.round(3).reset_index().rename(columns={'index': 'Column'})
|
124 |
|
|
|
125 |
cat_stats = self.df[self.metadata['categorical_cols']].describe(include=['object', 'category']).T
|
126 |
cat_stats_df = cat_stats.reset_index().rename(columns={'index': 'Column'})
|
127 |
|
|
|
164 |
logging.info("Generating AI narrative with the Gemini API.")
|
165 |
meta = self.metadata
|
166 |
|
167 |
+
# NOTE: The .to_markdown() method requires the 'tabulate' library.
|
168 |
+
# This is handled by the pre-flight check in if __name__ == "__main__":
|
169 |
+
data_snippet_md = self.df.head(5).to_markdown(index=False)
|
170 |
+
|
171 |
prompt = f"""
|
172 |
As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report.
|
173 |
Analyze the following dataset context and produce a professional, insightful, and clear analysis in Markdown format.
|
|
|
181 |
- **Total Missing Values:** {meta['total_missing']:,}
|
182 |
- **High-Correlation Pairs (>{Config.CORR_THRESHOLD}):** {meta['high_corr_pairs'] if meta['high_corr_pairs'] else 'None detected.'}
|
183 |
- **Data Snippet (First 5 Rows):**
|
184 |
+
{data_snippet_md}
|
185 |
|
186 |
**REQUIRED REPORT STRUCTURE (Strictly use this Markdown format):**
|
187 |
|
|
|
233 |
|
234 |
def create_ui():
|
235 |
"""Defines and builds the Gradio user interface."""
|
|
|
|
|
236 |
def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
|
237 |
if not col or not analyzer: return go.Figure()
|
238 |
return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
|
|
|
270 |
|
271 |
return stats_md, fig
|
272 |
|
|
|
273 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"), title=Config.APP_TITLE) as demo:
|
|
|
274 |
state_analyzer = gr.State()
|
|
|
275 |
gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
|
276 |
gr.Markdown("Upload a CSV file, provide your Gemini API key, and receive an instant, AI-driven analysis of your data.")
|
277 |
|
|
|
283 |
with gr.Column(scale=1, min_width=150):
|
284 |
analyze_button = gr.Button("β¨ Generate Analysis", variant="primary")
|
285 |
|
286 |
+
with gr.Tabs():
|
287 |
+
with gr.Tab("π€ AI Narrative"):
|
288 |
ai_report_output = gr.Markdown("Your AI-generated report will appear here once analysis is complete...")
|
289 |
download_report_button = gr.Button("β¬οΈ Download Full Report", visible=False)
|
290 |
+
with gr.Tab("Profile"):
|
|
|
291 |
gr.Markdown("### **Detailed Data Profile**")
|
|
|
292 |
profile_missing_df = gr.DataFrame(interactive=False, label="Missing Values")
|
|
|
293 |
profile_numeric_df = gr.DataFrame(interactive=False, label="Numeric Stats")
|
|
|
294 |
profile_categorical_df = gr.DataFrame(interactive=False, label="Categorical Stats")
|
295 |
+
with gr.Tab("π Overview Visuals"):
|
|
|
296 |
gr.Markdown("### **At-a-Glance Visualizations**")
|
297 |
with gr.Row():
|
298 |
plot_types = gr.Plot()
|
299 |
plot_missing = gr.Plot()
|
300 |
plot_correlation = gr.Plot()
|
301 |
+
with gr.Tab("π¨ Interactive Explorer"):
|
|
|
302 |
gr.Markdown("### **Visually Explore Feature Relationships**")
|
303 |
+
with gr.Row(equal_height=False):
|
304 |
with gr.Column(scale=1):
|
305 |
gr.Markdown("#### Univariate Analysis")
|
306 |
dd_hist_col = gr.Dropdown(label="Select Column for Histogram", visible=False)
|
307 |
with gr.Column(scale=2):
|
308 |
plot_histogram = gr.Plot()
|
309 |
+
with gr.Row(equal_height=False):
|
|
|
310 |
with gr.Column(scale=1):
|
311 |
gr.Markdown("#### Bivariate Analysis (Scatter Plot)")
|
312 |
dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", visible=False)
|
|
|
314 |
dd_scatter_color = gr.Dropdown(label="Color By (Optional)", visible=False)
|
315 |
with gr.Column(scale=2):
|
316 |
plot_scatter = gr.Plot()
|
317 |
+
with gr.Tab("π Column Deep-Dive"):
|
|
|
318 |
gr.Markdown("### **Inspect a Single Column in Detail**")
|
319 |
dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", visible=False)
|
320 |
with gr.Row():
|
|
|
324 |
gr.HTML("""
|
325 |
<div style="text-align: center; margin-top: 20px; font-family: sans-serif; color: #777;">
|
326 |
<p>π‘ Need an API key? Get one from <a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>.</p>
|
327 |
+
<p>CognitiveEDA v3.1 | An MCP Expert System</p>
|
328 |
</div>
|
329 |
""")
|
330 |
|
|
|
|
|
331 |
outputs_for_main_analysis = [
|
332 |
state_analyzer, ai_report_output, download_report_button,
|
333 |
profile_missing_df, profile_numeric_df, profile_categorical_df,
|
334 |
plot_types, plot_missing, plot_correlation,
|
335 |
dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col
|
336 |
]
|
337 |
+
analyze_button.click(fn=run_full_analysis, inputs=[upload_button, api_key_input], outputs=outputs_for_main_analysis)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
|
|
|
339 |
scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
|
340 |
+
for dd in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
|
341 |
+
dd.change(fn=create_scatterplot, inputs=scatter_inputs, outputs=plot_scatter)
|
342 |
+
dd_drilldown_col.change(fn=analyze_single_column, inputs=[state_analyzer, dd_drilldown_col], outputs=[md_drilldown_stats, plot_drilldown])
|
343 |
+
download_report_button.click(fn=download_report_file, inputs=[state_analyzer, ai_report_output], outputs=gr.File(label="Download Report"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
return demo
|
345 |
|
346 |
# --- Main Application Logic ---
|
347 |
|
348 |
def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any]:
|
349 |
+
"""Orchestrates the entire analysis pipeline upon button click."""
|
|
|
|
|
|
|
350 |
if file_obj is None:
|
351 |
raise gr.Error("CRITICAL: No file uploaded. Please select a CSV file.")
|
352 |
if not api_key:
|
|
|
357 |
df = pd.read_csv(file_obj.name)
|
358 |
analyzer = DataAnalyzer(df)
|
359 |
|
|
|
360 |
ai_report = analyzer.generate_ai_narrative(api_key)
|
361 |
missing_df, num_df, cat_df = analyzer.get_profiling_tables()
|
362 |
fig_types, fig_missing, fig_corr = analyzer.get_overview_visuals()
|
363 |
|
|
|
364 |
meta = analyzer.metadata
|
365 |
+
all_cols, num_cols = meta['columns'], meta['numeric_cols']
|
366 |
|
|
|
367 |
return {
|
368 |
+
state_analyzer: analyzer, ai_report_output: ai_report,
|
|
|
|
|
369 |
download_report_button: gr.Button(visible=True),
|
370 |
+
profile_missing_df: missing_df, profile_numeric_df: num_df,
|
371 |
+
profile_categorical_df: cat_df, plot_types: fig_types,
|
372 |
+
plot_missing: fig_missing, plot_correlation: fig_corr,
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
dd_hist_col: gr.Dropdown(choices=num_cols, label="Select Numeric Column", visible=True),
|
374 |
dd_scatter_x: gr.Dropdown(choices=num_cols, label="X-Axis (Numeric)", visible=True),
|
375 |
dd_scatter_y: gr.Dropdown(choices=num_cols, label="Y-Axis (Numeric)", visible=True),
|
376 |
dd_scatter_color: gr.Dropdown(choices=all_cols, label="Color By (Optional)", visible=True),
|
377 |
dd_drilldown_col: gr.Dropdown(choices=all_cols, label="Select Column to Analyze", visible=True)
|
378 |
}
|
|
|
379 |
except Exception as e:
|
380 |
logging.error(f"A critical error occurred during file processing: {e}", exc_info=True)
|
381 |
raise gr.Error(f"Analysis Failed! The process stopped due to: {str(e)}")
|
382 |
|
383 |
+
def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> Optional[str]:
|
|
|
384 |
"""Generates a comprehensive Markdown file for download."""
|
385 |
if not analyzer:
|
386 |
logging.warning("Download attempted without a valid analyzer object.")
|
|
|
388 |
|
389 |
filename = f"CognitiveEDA_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
390 |
meta = analyzer.metadata
|
|
|
|
|
391 |
full_report = f"# CognitiveEDA - Data Discovery Report\n"
|
392 |
full_report += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
393 |
full_report += f"## Dataset Overview\n"
|
|
|
399 |
|
400 |
with open(filename, "w", encoding="utf-8") as f:
|
401 |
f.write(full_report)
|
|
|
402 |
logging.info(f"Report file generated successfully: {filename}")
|
403 |
return filename
|
404 |
|
405 |
+
def perform_pre_flight_checks():
|
406 |
+
"""Checks for critical dependencies before launching the app."""
|
407 |
+
logging.info("Performing pre-flight dependency checks...")
|
408 |
+
required_packages = ["pandas", "gradio", "plotly", "google.generativeai", "tabulate"]
|
409 |
+
missing_packages = [pkg for pkg in required_packages if importlib.util.find_spec(pkg) is None]
|
410 |
+
|
411 |
+
if missing_packages:
|
412 |
+
logging.critical(f"Missing critical packages: {', '.join(missing_packages)}")
|
413 |
+
print("\n" + "="*80)
|
414 |
+
print("ERROR: Your environment is missing critical dependencies.")
|
415 |
+
print(f"Missing package(s): {', '.join(missing_packages)}")
|
416 |
+
print("Please install all required packages using the requirements.txt file:")
|
417 |
+
print("pip install -r requirements.txt")
|
418 |
+
print("="*80 + "\n")
|
419 |
+
sys.exit(1)
|
420 |
+
logging.info("All dependencies are satisfied. Proceeding with launch.")
|
421 |
|
422 |
if __name__ == "__main__":
|
423 |
+
perform_pre_flight_checks()
|
424 |
app_instance = create_ui()
|
425 |
app_instance.launch(debug=True, server_name="0.0.0.0")
|