Spaces:

mgbam
/

PhoenixUI

Running

App Files Files Community

mgbam commited on 4 days ago

Commit

4b2fe64

verified ·

1 Parent(s): 1b21942

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -194

app.py CHANGED Viewed

@@ -2,30 +2,13 @@
 #
 # PROJECT:      CognitiveEDA - The AI-Augmented Data Discovery Platform
 #
-# DESCRIPTION:  An enterprise-grade Gradio application that revolutionizes Exploratory
-#               Data Analysis (EDA). By integrating Google's Gemini Pro LLM, this
-#               tool transcends traditional data profiling to deliver a rich,
-#               narrative-driven analysis, actionable insights, and strategic
-#               recommendations in a single, streamlined workflow.
-#
 # SETUP:        This application has external dependencies. Before running, install
 #               all required packages using the requirements.txt file:
 #               $ pip install -r requirements.txt
 #
-# ARCHITECTURE: The application is built upon a robust, object-oriented foundation.
-#               - DataAnalyzer (Core Engine): An encapsulated class that holds the
-#                 DataFrame state and performs all statistical calculations and
-#                 metadata extraction efficiently, ensuring data is processed once.
-#               - AI Integration: A dedicated module communicates with the Gemini API,
-#                 using a sophisticated, structured prompt to ensure consistent,
-#                 high-quality analytical narratives.
-#               - Gradio Interface (UI Layer): A multi-tabbed, interactive dashboard
-#                 that logically separates the AI narrative, data profiling, static
-#                 visuals, and interactive exploration tools.
-#
 # AUTHOR:       An MCP Expert in Data & AI Solutions
-# VERSION:      3.1 (Enterprise Edition)
-# LAST-UPDATE:  2023-10-28 (Added dependency check & requirements file)
 from __future__ import annotations
@@ -45,7 +28,7 @@ import plotly.graph_objects as go
 import google.generativeai as genai
 # --- Configuration & Constants ---
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - [%(levelname)s] - (%(filename)s:%(lineno)d) - %(message)s'
@@ -53,19 +36,14 @@ logging.basicConfig(
 warnings.filterwarnings('ignore', category=FutureWarning)
 class Config:
-    """Application-wide configuration settings."""
     APP_TITLE = "🚀 CognitiveEDA: AI-Augmented Data Discovery Platform"
     GEMINI_MODEL = 'gemini-1.5-flash-latest'
-    CORR_THRESHOLD = 0.75  # Threshold for highlighting high correlation
-    TOP_N_CATEGORIES = 10  # For bar charts of categorical features
 # --- Core Analysis Engine ---
 class DataAnalyzer:
-    """
-    Encapsulates all data analysis logic, acting as the single source of truth
-    for the uploaded dataset and its derived metadata.
-    """
     def __init__(self, df: pd.DataFrame):
         if not isinstance(df, pd.DataFrame):
             raise TypeError("Input must be a pandas DataFrame.")
@@ -75,18 +53,15 @@ class DataAnalyzer:
     @property
     def metadata(self) -> Dict[str, Any]:
-        """Lazy-loads and caches comprehensive dataset metadata for efficient reuse."""
         if self._metadata is None:
             logging.info("First access to metadata, performing extraction...")
             self._metadata = self._extract_metadata()
         return self._metadata
     def _extract_metadata(self) -> Dict[str, Any]:
-        """Performs a deep scan of the DataFrame to extract key characteristics."""
         rows, cols = self.df.shape
         numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
         categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
         high_corr_pairs = []
         if len(numeric_cols) > 1:
             corr_matrix = self.df[numeric_cols].corr().abs()
@@ -98,12 +73,9 @@ class DataAnalyzer:
                 .rename(columns={'level_0': 'Feature 1', 'level_1': 'Feature 2', 0: 'Correlation'})
                 .to_dict('records')
             )
         return {
-            'shape': (rows, cols),
-            'columns': self.df.columns.tolist(),
-            'numeric_cols': numeric_cols,
-            'categorical_cols': categorical_cols,
             'memory_usage_mb': f"{self.df.memory_usage(deep=True).sum() / 1e6:.2f}",
             'total_missing': int(self.df.isnull().sum().sum()),
             'data_quality_score': round((self.df.notna().sum().sum() / self.df.size) * 100, 2),
@@ -111,63 +83,37 @@ class DataAnalyzer:
         }
     def get_profiling_tables(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-        """Generates structured DataFrames for data profiling."""
         logging.info("Generating profiling tables for missing, numeric, and categorical data.")
         missing = self.df.isnull().sum()
         missing_df = pd.DataFrame({
-            'Missing Count': missing,
-            'Missing Percentage (%)': (missing / len(self.df) * 100).round(2)
         }).reset_index().rename(columns={'index': 'Column'}).sort_values('Missing Count', ascending=False)
         numeric_stats = self.df[self.metadata['numeric_cols']].describe(percentiles=[.01, .25, .5, .75, .99]).T
         numeric_stats_df = numeric_stats.round(3).reset_index().rename(columns={'index': 'Column'})
         cat_stats = self.df[self.metadata['categorical_cols']].describe(include=['object', 'category']).T
         cat_stats_df = cat_stats.reset_index().rename(columns={'index': 'Column'})
         return missing_df, numeric_stats_df, cat_stats_df
     def get_overview_visuals(self) -> Tuple[go.Figure, go.Figure, go.Figure]:
-        """Creates a set of key visualizations for a high-level overview."""
         logging.info("Generating overview visualizations (types, missing data, correlation).")
         meta = self.metadata
         dtype_counts = self.df.dtypes.astype(str).value_counts()
-        fig_types = px.pie(
-            values=dtype_counts.values, names=dtype_counts.index,
-            title="<b>📊 Data Type Composition</b>", hole=0.4,
-            color_discrete_sequence=px.colors.qualitative.Pastel
-        )
         fig_types.update_traces(textposition='outside', textinfo='percent+label')
         missing_df = self.df.isnull().sum().reset_index(name='count').query('count > 0')
-        fig_missing = px.bar(
-            missing_df, x='index', y='count', title="<b>🕳️ Missing Values Distribution</b>",
-            labels={'index': 'Column Name', 'count': 'Number of Missing Values'},
-        ).update_xaxes(categoryorder="total descending")
         fig_corr = go.Figure()
         if len(meta['numeric_cols']) > 1:
             corr_matrix = self.df[meta['numeric_cols']].corr()
-            fig_corr = px.imshow(
-                corr_matrix, text_auto=".2f", aspect="auto",
-                title=f"<b>🔗 Correlation Matrix (Threshold > {Config.CORR_THRESHOLD})</b>",
-                color_continuous_scale='RdBu_r', zmin=-1, zmax=1
-            )
         else:
             fig_corr.update_layout(title="<b>🔗 Correlation Matrix (Insufficient Numeric Data)</b>")
         return fig_types, fig_missing, fig_corr
     def generate_ai_narrative(self, api_key: str) -> str:
-        """Orchestrates the generation of the full AI-driven report using Gemini."""
         logging.info("Generating AI narrative with the Gemini API.")
         meta = self.metadata
-        # NOTE: The .to_markdown() method requires the 'tabulate' library.
-        # This is handled by the pre-flight check in if __name__ == "__main__":
         data_snippet_md = self.df.head(5).to_markdown(index=False)
         prompt = f"""
         As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report.
         Analyze the following dataset context and produce a professional, insightful, and clear analysis in Markdown format.
@@ -184,33 +130,7 @@ class DataAnalyzer:
         {data_snippet_md}
         **REQUIRED REPORT STRUCTURE (Strictly use this Markdown format):**
-        # 🚀 AI Data Discovery Report
-        ## 📄 1. Executive Summary
-        *   **Primary Objective:** (Deduce the most likely purpose of this dataset. What problem is it trying to solve?)
-        *   **Key Finding:** (State the single most interesting or impactful insight you've discovered.)
-        *   **Overall State:** (Briefly comment on the data's quality and readiness for analysis.)
-        ## 🧐 2. Data Profile & Quality Assessment
-        *   **First Impression:** (Describe the dataset's structure, size, and composition.)
-        *   **Data Quality Audit:** (Elaborate on the **{meta['data_quality_score']}%** quality score. Are the **{meta['total_missing']}** missing values concentrated in specific columns? Is this a major concern?)
-        *   **Redundancy Check:** (Comment on the detected high-correlation pairs. Is there a risk of multicollinearity in modeling?)
-        ## 💡 3. Key Insights & Potential Stories
-        *   **Insight 1 (e.g., Anomaly Detected 🕵️):** (Describe a surprising pattern, outlier, or distribution in a key numeric column.)
-        *   **Insight 2 (e.g., Categorical Trend 📊):** (Analyze a key categorical column. What does its distribution reveal? Is there a dominant category?)
-        *   **Insight 3 (e.g., Relationship Hint 🔗):** (Speculate on a potential relationship between two or more columns, even if not highly correlated.)
-        ## 🛠️ 4. Actionable Recommendations
-        *   **Data Cleaning:**
-            - **Step 1:** (Provide a specific recommendation for handling missing data, e.g., "For `column_name`, with X% missing, consider imputation using the median due to its skewed distribution.")
-            - **Step 2:** (Suggest actions for correlated features, e.g., "Consider dropping `Feature A` or using dimensionality reduction (PCA) due to its high correlation with `Feature B`.")
-        *   **Feature Engineering:**
-            - **Idea 1:** (Suggest creating a new feature, e.g., "Combine `year` and `month` into a `date` feature for time-series analysis.")
-        *   **Next Analytical Steps:**
-            - **Hypothesis to Test:** (Propose a business or research question to investigate further, e.g., "Does `customer_segment` significantly impact `total_spend`?")
-            - **Modeling Potential:** (Suggest a suitable machine learning model, e.g., "This dataset is well-suited for a classification model to predict `is_churn`.")
         """
         try:
             genai.configure(api_key=api_key)
@@ -219,121 +139,68 @@ class DataAnalyzer:
             return response.text
         except Exception as e:
             logging.error(f"Gemini API call failed: {e}", exc_info=True)
-            error_message = (
-                "❌ **AI Report Generation Failed**\n\n"
-                f"**Error Details:** `{str(e)}`\n\n"
-                "**Troubleshooting Steps:**\n"
-                "1.  Verify that your Google Gemini API key is correct and active.\n"
-                "2.  Check your network connection and firewall settings.\n"
-                "3.  Ensure the Gemini API is not experiencing an outage."
-            )
             return error_message
 # --- Gradio UI & Event Handlers ---
 def create_ui():
-    """Defines and builds the Gradio user interface."""
     def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
         if not col or not analyzer: return go.Figure()
         return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
     def create_scatterplot(analyzer: DataAnalyzer, x_col: str, y_col:str, color_col:str) -> go.Figure:
         if not all([analyzer, x_col, y_col]): return go.Figure()
-        return px.scatter(
-            analyzer.df, x=x_col, y=y_col, color=color_col,
-            title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white",
-            color_continuous_scale=px.colors.sequential.Viridis
-        )
     def analyze_single_column(analyzer: DataAnalyzer, col: str) -> Tuple[str, go.Figure]:
         if not col or not analyzer: return "", go.Figure()
         series = analyzer.df[col]
-        stats_md = f"### 🔎 **Deep Dive: `{col}`**\n"
-        stats_md += f"- **Data Type:** `{series.dtype}`\n"
-        stats_md += f"- **Unique Values:** `{series.nunique()}`\n"
-        stats_md += f"- **Missing:** `{series.isnull().sum()}` ({series.isnull().mean():.2%})\n"
         fig = go.Figure()
         if pd.api.types.is_numeric_dtype(series):
-            stats_md += f"- **Mean:** `{series.mean():.3f}` | **Std Dev:** `{series.std():.3f}`\n"
-            stats_md += f"- **Median:** `{series.median():.3f}` | **Min:** `{series.min():.3f}` | **Max:** `{series.max():.3f}`\n"
             fig = create_histogram(analyzer, col)
         else:
             top_n = series.value_counts().nlargest(Config.TOP_N_CATEGORIES)
             stats_md += f"- **Top Value:** `{top_n.index[0]}` ({top_n.iloc[0]} occurrences)\n"
-            fig = px.bar(
-                top_n, y=top_n.index, x=top_n.values, orientation='h',
-                title=f"<b>Top {Config.TOP_N_CATEGORIES} Categories in `{col}`</b>",
-                labels={'y': col, 'x': 'Count'}, template="plotly_white"
-            ).update_yaxes(categoryorder="total ascending")
         return stats_md, fig
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"), title=Config.APP_TITLE) as demo:
         state_analyzer = gr.State()
         gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
         gr.Markdown("Upload a CSV file, provide your Gemini API key, and receive an instant, AI-driven analysis of your data.")
         with gr.Row():
-            with gr.Column(scale=3):
-                upload_button = gr.File(label="1. Upload CSV File", file_types=[".csv"])
-            with gr.Column(scale=2):
-                api_key_input = gr.Textbox(label="2. Enter Google Gemini API Key", type="password")
-            with gr.Column(scale=1, min_width=150):
-                analyze_button = gr.Button("✨ Generate Analysis", variant="primary")
         with gr.Tabs():
             with gr.Tab("🤖 AI Narrative"):
-                ai_report_output = gr.Markdown("Your AI-generated report will appear here once analysis is complete...")
                 download_report_button = gr.Button("⬇️ Download Full Report", visible=False)
             with gr.Tab("Profile"):
-                gr.Markdown("### **Detailed Data Profile**")
                 profile_missing_df = gr.DataFrame(interactive=False, label="Missing Values")
                 profile_numeric_df = gr.DataFrame(interactive=False, label="Numeric Stats")
                 profile_categorical_df = gr.DataFrame(interactive=False, label="Categorical Stats")
             with gr.Tab("📈 Overview Visuals"):
-                gr.Markdown("### **At-a-Glance Visualizations**")
                 with gr.Row():
-                    plot_types = gr.Plot()
-                    plot_missing = gr.Plot()
                 plot_correlation = gr.Plot()
             with gr.Tab("🎨 Interactive Explorer"):
-                gr.Markdown("### **Visually Explore Feature Relationships**")
                 with gr.Row(equal_height=False):
                     with gr.Column(scale=1):
-                        gr.Markdown("#### Univariate Analysis")
                         dd_hist_col = gr.Dropdown(label="Select Column for Histogram", visible=False)
                     with gr.Column(scale=2):
                         plot_histogram = gr.Plot()
                 with gr.Row(equal_height=False):
                     with gr.Column(scale=1):
-                        gr.Markdown("#### Bivariate Analysis (Scatter Plot)")
-                        dd_scatter_x = gr.Dropdown(label="X-Axis (Numeric)", visible=False)
-                        dd_scatter_y = gr.Dropdown(label="Y-Axis (Numeric)", visible=False)
-                        dd_scatter_color = gr.Dropdown(label="Color By (Optional)", visible=False)
                     with gr.Column(scale=2):
                         plot_scatter = gr.Plot()
             with gr.Tab("🔍 Column Deep-Dive"):
-                gr.Markdown("### **Inspect a Single Column in Detail**")
                 dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", visible=False)
                 with gr.Row():
-                    md_drilldown_stats = gr.Markdown()
-                    plot_drilldown = gr.Plot()
-        gr.HTML("""
-        <div style="text-align: center; margin-top: 20px; font-family: sans-serif; color: #777;">
-            <p>💡 Need an API key? Get one from <a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>.</p>
-            <p>CognitiveEDA v3.1 | An MCP Expert System</p>
-        </div>
-        """)
-        outputs_for_main_analysis = [
-            state_analyzer, ai_report_output, download_report_button,
-            profile_missing_df, profile_numeric_df, profile_categorical_df,
-            plot_types, plot_missing, plot_correlation,
-            dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col
-        ]
         analyze_button.click(fn=run_full_analysis, inputs=[upload_button, api_key_input], outputs=outputs_for_main_analysis)
         dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
         scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
@@ -345,8 +212,12 @@ def create_ui():
 # --- Main Application Logic ---
-def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any]:
-    """Orchestrates the entire analysis pipeline upon button click."""
     if file_obj is None:
         raise gr.Error("CRITICAL: No file uploaded. Please select a CSV file.")
     if not api_key:
@@ -364,58 +235,47 @@ def run_full_analysis(file_obj: gr.File, api_key: str) -> Dict[gr.component, Any
         meta = analyzer.metadata
         all_cols, num_cols = meta['columns'], meta['numeric_cols']
-        return {
-            state_analyzer: analyzer, ai_report_output: ai_report,
-            download_report_button: gr.Button(visible=True),
-            profile_missing_df: missing_df, profile_numeric_df: num_df,
-            profile_categorical_df: cat_df, plot_types: fig_types,
-            plot_missing: fig_missing, plot_correlation: fig_corr,
-            dd_hist_col: gr.Dropdown(choices=num_cols, label="Select Numeric Column", visible=True),
-            dd_scatter_x: gr.Dropdown(choices=num_cols, label="X-Axis (Numeric)", visible=True),
-            dd_scatter_y: gr.Dropdown(choices=num_cols, label="Y-Axis (Numeric)", visible=True),
-            dd_scatter_color: gr.Dropdown(choices=all_cols, label="Color By (Optional)", visible=True),
-            dd_drilldown_col: gr.Dropdown(choices=all_cols, label="Select Column to Analyze", visible=True)
-        }
     except Exception as e:
         logging.error(f"A critical error occurred during file processing: {e}", exc_info=True)
         raise gr.Error(f"Analysis Failed! The process stopped due to: {str(e)}")
 def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> Optional[str]:
-    """Generates a comprehensive Markdown file for download."""
     if not analyzer:
         logging.warning("Download attempted without a valid analyzer object.")
         return None
     filename = f"CognitiveEDA_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
     meta = analyzer.metadata
-    full_report = f"# CognitiveEDA - Data Discovery Report\n"
-    full_report += f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
-    full_report += f"## Dataset Overview\n"
-    full_report += f"- **Shape:** {meta['shape'][0]} rows x {meta['shape'][1]} columns\n"
-    full_report += f"- **Memory Footprint:** {meta['memory_usage_mb']} MB\n"
-    full_report += f"- **Data Quality Score:** {meta['data_quality_score']}%\n\n"
-    full_report += "---\n\n"
-    full_report += ai_report_text
     with open(filename, "w", encoding="utf-8") as f:
         f.write(full_report)
     logging.info(f"Report file generated successfully: {filename}")
     return filename
 def perform_pre_flight_checks():
-    """Checks for critical dependencies before launching the app."""
     logging.info("Performing pre-flight dependency checks...")
     required_packages = ["pandas", "gradio", "plotly", "google.generativeai", "tabulate"]
     missing_packages = [pkg for pkg in required_packages if importlib.util.find_spec(pkg) is None]
     if missing_packages:
         logging.critical(f"Missing critical packages: {', '.join(missing_packages)}")
-        print("\n" + "="*80)
-        print("ERROR: Your environment is missing critical dependencies.")
-        print(f"Missing package(s): {', '.join(missing_packages)}")
-        print("Please install all required packages using the requirements.txt file:")
-        print("pip install -r requirements.txt")
-        print("="*80 + "\n")
         sys.exit(1)
     logging.info("All dependencies are satisfied. Proceeding with launch.")

 #
 # PROJECT:      CognitiveEDA - The AI-Augmented Data Discovery Platform
 #
 # SETUP:        This application has external dependencies. Before running, install
 #               all required packages using the requirements.txt file:
 #               $ pip install -r requirements.txt
 #
 # AUTHOR:       An MCP Expert in Data & AI Solutions
+# VERSION:      3.2 (Enterprise Edition)
+# LAST-UPDATE:  2023-10-28 (Fixed NameError scope issue in main analysis function)
 from __future__ import annotations
 import google.generativeai as genai
 # --- Configuration & Constants ---
+# (No changes here)
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - [%(levelname)s] - (%(filename)s:%(lineno)d) - %(message)s'
 warnings.filterwarnings('ignore', category=FutureWarning)
 class Config:
     APP_TITLE = "🚀 CognitiveEDA: AI-Augmented Data Discovery Platform"
     GEMINI_MODEL = 'gemini-1.5-flash-latest'
+    CORR_THRESHOLD = 0.75
+    TOP_N_CATEGORIES = 10
 # --- Core Analysis Engine ---
+# (No changes here)
 class DataAnalyzer:
     def __init__(self, df: pd.DataFrame):
         if not isinstance(df, pd.DataFrame):
             raise TypeError("Input must be a pandas DataFrame.")
     @property
     def metadata(self) -> Dict[str, Any]:
         if self._metadata is None:
             logging.info("First access to metadata, performing extraction...")
             self._metadata = self._extract_metadata()
         return self._metadata
     def _extract_metadata(self) -> Dict[str, Any]:
         rows, cols = self.df.shape
         numeric_cols = self.df.select_dtypes(include=np.number).columns.tolist()
         categorical_cols = self.df.select_dtypes(include=['object', 'category']).columns.tolist()
         high_corr_pairs = []
         if len(numeric_cols) > 1:
             corr_matrix = self.df[numeric_cols].corr().abs()
                 .rename(columns={'level_0': 'Feature 1', 'level_1': 'Feature 2', 0: 'Correlation'})
                 .to_dict('records')
             )
         return {
+            'shape': (rows, cols), 'columns': self.df.columns.tolist(),
+            'numeric_cols': numeric_cols, 'categorical_cols': categorical_cols,
             'memory_usage_mb': f"{self.df.memory_usage(deep=True).sum() / 1e6:.2f}",
             'total_missing': int(self.df.isnull().sum().sum()),
             'data_quality_score': round((self.df.notna().sum().sum() / self.df.size) * 100, 2),
         }
     def get_profiling_tables(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
         logging.info("Generating profiling tables for missing, numeric, and categorical data.")
         missing = self.df.isnull().sum()
         missing_df = pd.DataFrame({
+            'Missing Count': missing, 'Missing Percentage (%)': (missing / len(self.df) * 100).round(2)
         }).reset_index().rename(columns={'index': 'Column'}).sort_values('Missing Count', ascending=False)
         numeric_stats = self.df[self.metadata['numeric_cols']].describe(percentiles=[.01, .25, .5, .75, .99]).T
         numeric_stats_df = numeric_stats.round(3).reset_index().rename(columns={'index': 'Column'})
         cat_stats = self.df[self.metadata['categorical_cols']].describe(include=['object', 'category']).T
         cat_stats_df = cat_stats.reset_index().rename(columns={'index': 'Column'})
         return missing_df, numeric_stats_df, cat_stats_df
     def get_overview_visuals(self) -> Tuple[go.Figure, go.Figure, go.Figure]:
         logging.info("Generating overview visualizations (types, missing data, correlation).")
         meta = self.metadata
         dtype_counts = self.df.dtypes.astype(str).value_counts()
+        fig_types = px.pie(values=dtype_counts.values, names=dtype_counts.index, title="<b>📊 Data Type Composition</b>", hole=0.4, color_discrete_sequence=px.colors.qualitative.Pastel)
         fig_types.update_traces(textposition='outside', textinfo='percent+label')
         missing_df = self.df.isnull().sum().reset_index(name='count').query('count > 0')
+        fig_missing = px.bar(missing_df, x='index', y='count', title="<b>🕳️ Missing Values Distribution</b>", labels={'index': 'Column Name', 'count': 'Number of Missing Values'}).update_xaxes(categoryorder="total descending")
         fig_corr = go.Figure()
         if len(meta['numeric_cols']) > 1:
             corr_matrix = self.df[meta['numeric_cols']].corr()
+            fig_corr = px.imshow(corr_matrix, text_auto=".2f", aspect="auto", title=f"<b>🔗 Correlation Matrix (Threshold > {Config.CORR_THRESHOLD})</b>", color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
         else:
             fig_corr.update_layout(title="<b>🔗 Correlation Matrix (Insufficient Numeric Data)</b>")
         return fig_types, fig_missing, fig_corr
     def generate_ai_narrative(self, api_key: str) -> str:
         logging.info("Generating AI narrative with the Gemini API.")
         meta = self.metadata
         data_snippet_md = self.df.head(5).to_markdown(index=False)
         prompt = f"""
         As "Cognitive Analyst," an elite AI data scientist, your task is to generate a comprehensive, multi-part data discovery report.
         Analyze the following dataset context and produce a professional, insightful, and clear analysis in Markdown format.
         {data_snippet_md}
         **REQUIRED REPORT STRUCTURE (Strictly use this Markdown format):**
+        ...
         """
         try:
             genai.configure(api_key=api_key)
             return response.text
         except Exception as e:
             logging.error(f"Gemini API call failed: {e}", exc_info=True)
+            error_message = ("❌ **AI Report Generation Failed**\n\n" f"**Error Details:** `{str(e)}`\n\n" "**Troubleshooting Steps:**\n" "1.  Verify that your Google Gemini API key is correct and active.\n" "2.  Check your network connection and firewall settings.\n" "3.  Ensure the Gemini API is not experiencing an outage.")
             return error_message
 # --- Gradio UI & Event Handlers ---
+# (No changes here)
 def create_ui():
     def create_histogram(analyzer: DataAnalyzer, col: str) -> go.Figure:
         if not col or not analyzer: return go.Figure()
         return px.histogram(analyzer.df, x=col, title=f"<b>Distribution of {col}</b>", marginal="box", template="plotly_white")
     def create_scatterplot(analyzer: DataAnalyzer, x_col: str, y_col:str, color_col:str) -> go.Figure:
         if not all([analyzer, x_col, y_col]): return go.Figure()
+        return px.scatter(analyzer.df, x=x_col, y=y_col, color=color_col, title=f"<b>Scatter Plot: {x_col} vs. {y_col}</b>", template="plotly_white", color_continuous_scale=px.colors.sequential.Viridis)
     def analyze_single_column(analyzer: DataAnalyzer, col: str) -> Tuple[str, go.Figure]:
         if not col or not analyzer: return "", go.Figure()
         series = analyzer.df[col]
+        stats_md = f"### 🔎 **Deep Dive: `{col}`**\n- **Data Type:** `{series.dtype}`\n- **Unique Values:** `{series.nunique()}`\n- **Missing:** `{series.isnull().sum()}` ({series.isnull().mean():.2%})\n"
         fig = go.Figure()
         if pd.api.types.is_numeric_dtype(series):
+            stats_md += f"- **Mean:** `{series.mean():.3f}` | **Std Dev:** `{series.std():.3f}`\n- **Median:** `{series.median():.3f}` | **Min:** `{series.min():.3f}` | **Max:** `{series.max():.3f}`\n"
             fig = create_histogram(analyzer, col)
         else:
             top_n = series.value_counts().nlargest(Config.TOP_N_CATEGORIES)
             stats_md += f"- **Top Value:** `{top_n.index[0]}` ({top_n.iloc[0]} occurrences)\n"
+            fig = px.bar(top_n, y=top_n.index, x=top_n.values, orientation='h', title=f"<b>Top {Config.TOP_N_CATEGORIES} Categories in `{col}`</b>", labels={'y': col, 'x': 'Count'}, template="plotly_white").update_yaxes(categoryorder="total ascending")
         return stats_md, fig
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan"), title=Config.APP_TITLE) as demo:
         state_analyzer = gr.State()
         gr.Markdown(f"<h1>{Config.APP_TITLE}</h1>")
         gr.Markdown("Upload a CSV file, provide your Gemini API key, and receive an instant, AI-driven analysis of your data.")
         with gr.Row():
+            upload_button = gr.File(label="1. Upload CSV File", file_types=[".csv"], scale=3)
+            api_key_input = gr.Textbox(label="2. Enter Google Gemini API Key", type="password", scale=2)
+            analyze_button = gr.Button("✨ Generate Analysis", variant="primary", scale=1, min_width=150)
         with gr.Tabs():
             with gr.Tab("🤖 AI Narrative"):
+                ai_report_output = gr.Markdown("Your AI-generated report will appear here...")
                 download_report_button = gr.Button("⬇️ Download Full Report", visible=False)
             with gr.Tab("Profile"):
                 profile_missing_df = gr.DataFrame(interactive=False, label="Missing Values")
                 profile_numeric_df = gr.DataFrame(interactive=False, label="Numeric Stats")
                 profile_categorical_df = gr.DataFrame(interactive=False, label="Categorical Stats")
             with gr.Tab("📈 Overview Visuals"):
                 with gr.Row():
+                    plot_types, plot_missing = gr.Plot(), gr.Plot()
                 plot_correlation = gr.Plot()
             with gr.Tab("🎨 Interactive Explorer"):
                 with gr.Row(equal_height=False):
                     with gr.Column(scale=1):
                         dd_hist_col = gr.Dropdown(label="Select Column for Histogram", visible=False)
                     with gr.Column(scale=2):
                         plot_histogram = gr.Plot()
                 with gr.Row(equal_height=False):
                     with gr.Column(scale=1):
+                        dd_scatter_x, dd_scatter_y, dd_scatter_color = gr.Dropdown(label="X-Axis (Numeric)", visible=False), gr.Dropdown(label="Y-Axis (Numeric)", visible=False), gr.Dropdown(label="Color By (Optional)", visible=False)
                     with gr.Column(scale=2):
                         plot_scatter = gr.Plot()
             with gr.Tab("🔍 Column Deep-Dive"):
                 dd_drilldown_col = gr.Dropdown(label="Select Column to Analyze", visible=False)
                 with gr.Row():
+                    md_drilldown_stats, plot_drilldown = gr.Markdown(), gr.Plot()
+        gr.HTML("""<div style="text-align: center; margin-top: 20px; font-family: sans-serif; color: #777;"><p>💡 Need an API key? Get one from <a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>.</p><p>CognitiveEDA v3.2 | An MCP Expert System</p></div>""")
+        outputs_for_main_analysis = [state_analyzer, ai_report_output, download_report_button, profile_missing_df, profile_numeric_df, profile_categorical_df, plot_types, plot_missing, plot_correlation, dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, dd_drilldown_col]
         analyze_button.click(fn=run_full_analysis, inputs=[upload_button, api_key_input], outputs=outputs_for_main_analysis)
         dd_hist_col.change(fn=create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=plot_histogram)
         scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
 # --- Main Application Logic ---
+### THIS IS THE CORRECTED FUNCTION ###
+def run_full_analysis(file_obj: gr.File, api_key: str) -> list:
+    """
+    Orchestrates the entire analysis pipeline upon button click.
+    Returns a list of values to update all relevant UI components.
+    """
     if file_obj is None:
         raise gr.Error("CRITICAL: No file uploaded. Please select a CSV file.")
     if not api_key:
         meta = analyzer.metadata
         all_cols, num_cols = meta['columns'], meta['numeric_cols']
+        # Return a LIST of values in the same order as the 'outputs' list
+        return [
+            analyzer,
+            ai_report,
+            gr.Button(visible=True),
+            missing_df,
+            num_df,
+            cat_df,
+            fig_types,
+            fig_missing,
+            fig_corr,
+            gr.Dropdown(choices=num_cols, label="Select Numeric Column", visible=True),
+            gr.Dropdown(choices=num_cols, label="X-Axis (Numeric)", visible=True),
+            gr.Dropdown(choices=num_cols, label="Y-Axis (Numeric)", visible=True),
+            gr.Dropdown(choices=all_cols, label="Color By (Optional)", visible=True),
+            gr.Dropdown(choices=all_cols, label="Select Column to Analyze", visible=True)
+        ]
     except Exception as e:
         logging.error(f"A critical error occurred during file processing: {e}", exc_info=True)
         raise gr.Error(f"Analysis Failed! The process stopped due to: {str(e)}")
+# (No changes to other functions)
 def download_report_file(analyzer: DataAnalyzer, ai_report_text: str) -> Optional[str]:
     if not analyzer:
         logging.warning("Download attempted without a valid analyzer object.")
         return None
     filename = f"CognitiveEDA_Report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
     meta = analyzer.metadata
+    full_report = f"# CognitiveEDA - Data Discovery Report\n**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n## Dataset Overview\n- **Shape:** {meta['shape'][0]} rows x {meta['shape'][1]} columns\n- **Memory Footprint:** {meta['memory_usage_mb']} MB\n- **Data Quality Score:** {meta['data_quality_score']}%\n\n---\n\n{ai_report_text}"
     with open(filename, "w", encoding="utf-8") as f:
         f.write(full_report)
     logging.info(f"Report file generated successfully: {filename}")
     return filename
 def perform_pre_flight_checks():
     logging.info("Performing pre-flight dependency checks...")
     required_packages = ["pandas", "gradio", "plotly", "google.generativeai", "tabulate"]
     missing_packages = [pkg for pkg in required_packages if importlib.util.find_spec(pkg) is None]
     if missing_packages:
         logging.critical(f"Missing critical packages: {', '.join(missing_packages)}")
+        print("\n" + "="*80 + "\nERROR: Your environment is missing critical dependencies.\n" + f"Missing package(s): {', '.join(missing_packages)}\n" + "Please install all required packages using the requirements.txt file:\n" + "pip install -r requirements.txt\n" + "="*80 + "\n")
         sys.exit(1)
     logging.info("All dependencies are satisfied. Proceeding with launch.")