Spaces:

mgbam
/

CognitiveEDA

Sleeping

App Files Files Community

mgbam commited on 9 days ago

Commit

05e596d

verified ·

1 Parent(s): dc6813f

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -28

app.py CHANGED Viewed

@@ -2,16 +2,17 @@
 # -*- coding: utf-8 -*-
 #
-# PROJECT:      CognitiveEDA v5.7 - The QuantumLeap Intelligence Platform
 #
-# DESCRIPTION:  Main application entry point. This version adds UI components
-#               for cluster profiling to the Clustering tab.
 #
 # SETUP:        $ pip install -r requirements.txt
 #
 # AUTHOR:       An MCP & PhD Expert in Data & AI Solutions
-# VERSION:      5.7 (Cluster Profiling Edition)
-# LAST-UPDATE:  2023-10-31 (Integrated cluster persona analysis)
 import warnings
 import logging
@@ -38,7 +39,6 @@ def main():
             upload_button = gr.File(label="1. Upload Data File", file_types=[".csv", ".xlsx"], scale=3)
             analyze_button = gr.Button("✨ Generate Intelligence Report", variant="primary", scale=1)
         with gr.Tabs():
-            # ... (Other tabs remain the same)
             with gr.Tab("🤖 AI-Powered Strategy Report"):
                 ai_report_output = gr.Markdown("### Your AI-generated report will appear here...")
             with gr.Tab("📋 Data Profile"):
@@ -52,24 +52,29 @@ def main():
                     plot_histogram = gr.Plot()
                 with gr.Row():
                     with gr.Column(scale=1):
-                        dd_scatter_x, dd_scatter_y, dd_scatter_color = gr.Dropdown(label="X-Axis", interactive=True), gr.Dropdown(label="Y-Axis", interactive=True), gr.Dropdown(label="Color By", interactive=True)
-                    with gr.Column(scale=2): plot_scatter = gr.Plot()
-            # --- MODIFIED CLUSTERING TAB ---
             with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster:
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        num_clusters = gr.Slider(minimum=2, maximum=10, value=5, step=1, label="Number of Clusters (K)", interactive=True)
-                        md_cluster_summary = gr.Markdown("Methodology summary will appear here.")
-                    with gr.Column(scale=2):
-                        plot_cluster = gr.Plot(label="PCA Visualization")
                 gr.Markdown("---")
-                gr.Markdown("## Cluster Profile Analysis")
                 with gr.Row():
                     with gr.Column(scale=1):
                         md_cluster_personas = gr.Markdown("Detailed cluster personas will appear here...")
                     with gr.Column(scale=2):
                         plot_cluster_profile = gr.Plot(label="Cluster Profile Visualization")
                 gr.Markdown("---")
@@ -78,14 +83,16 @@ def main():
             tab_timeseries, tab_text = gr.Tab("⌛ Time-Series", visible=False), gr.Tab("📝 Text", visible=False)
         main_outputs = [
             ai_report_output, profile_missing_df, profile_numeric_df, profile_categorical_df,
             plot_types, plot_missing, plot_correlation,
             dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color,
-            tab_timeseries, tab_text, tab_cluster
         ]
-        # 2. REGISTER EVENT HANDLERS
         analysis_complete_event = analyze_button.click(
             fn=callbacks.run_initial_analysis,
             inputs=[upload_button],
@@ -102,18 +109,23 @@ def main():
         for dropdown in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
             dropdown.change(fn=callbacks.create_scatterplot, inputs=scatter_inputs, outputs=[plot_scatter])
-        # --- MODIFIED CLUSTERING CALLBACK WIRING ---
-        num_clusters.change(
-            fn=callbacks.update_clustering,
-            inputs=[state_analyzer, num_clusters],
-            outputs=[
-                plot_cluster,
-                plot_elbow,
-                md_cluster_summary,
-                md_cluster_personas,
-                plot_cluster_profile
-            ]
         )
     demo.launch(debug=False, server_name="0.0.0.0")

 # -*- coding: utf-8 -*-
 #
+# PROJECT:      CognitiveEDA v5.9 - The QuantumLeap Intelligence Platform
 #
+# DESCRIPTION:  Main application entry point. This version implements a fully
+#               generic, data-agnostic stratification system, allowing users
+#               to dynamically filter and analyze any high-cardinality feature.
 #
 # SETUP:        $ pip install -r requirements.txt
 #
 # AUTHOR:       An MCP & PhD Expert in Data & AI Solutions
+# VERSION:      5.9 (Generic Stratification Engine)
+# LAST-UPDATE:  2023-11-01 (Abstracted stratification controls)
 import warnings
 import logging
             upload_button = gr.File(label="1. Upload Data File", file_types=[".csv", ".xlsx"], scale=3)
             analyze_button = gr.Button("✨ Generate Intelligence Report", variant="primary", scale=1)
         with gr.Tabs():
             with gr.Tab("🤖 AI-Powered Strategy Report"):
                 ai_report_output = gr.Markdown("### Your AI-generated report will appear here...")
             with gr.Tab("📋 Data Profile"):
                     plot_histogram = gr.Plot()
                 with gr.Row():
                     with gr.Column(scale=1):
+                        dd_scatter_x = gr.Dropdown(label="X-Axis", interactive=True)
+                        dd_scatter_y = gr.Dropdown(label="Y-Axis", interactive=True)
+                        dd_scatter_color = gr.Dropdown(label="Color By", interactive=True)
+                    with gr.Column(scale=2):
+                        plot_scatter = gr.Plot()
+            # --- GENERALIZED CLUSTERING TAB ---
             with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster:
+                gr.Markdown("### Stratified Analysis Control Panel")
+                gr.Markdown("Use this to control for a dominant categorical variable. First, select a feature to stratify by (e.g., 'Region', 'Product_Category'), then select a specific value to analyze.")
                 with gr.Row():
+                    dd_stratify_by_col = gr.Dropdown(label="1. Stratify By Feature", interactive=True, scale=2)
+                    dd_stratify_by_value = gr.Dropdown(label="2. Filter By Value", interactive=True, scale=2)
+                    num_clusters = gr.Slider(minimum=2, maximum=10, value=4, step=1, label="Number of Clusters (K)", interactive=True, scale=1)
                 gr.Markdown("---")
+                gr.Markdown("## Segmentation Visualization & Profile")
                 with gr.Row():
                     with gr.Column(scale=1):
+                        md_cluster_summary = gr.Markdown("Methodology summary will appear here.")
                         md_cluster_personas = gr.Markdown("Detailed cluster personas will appear here...")
                     with gr.Column(scale=2):
+                        plot_cluster = gr.Plot(label="PCA Visualization")
                         plot_cluster_profile = gr.Plot(label="Cluster Profile Visualization")
                 gr.Markdown("---")
             tab_timeseries, tab_text = gr.Tab("⌛ Time-Series", visible=False), gr.Tab("📝 Text", visible=False)
+        # 2. DEFINE OUTPUT LISTS
         main_outputs = [
             ai_report_output, profile_missing_df, profile_numeric_df, profile_categorical_df,
             plot_types, plot_missing, plot_correlation,
             dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color,
+            tab_timeseries, tab_text, tab_cluster,
+            dd_stratify_by_col
         ]
+        # 3. REGISTER EVENT HANDLERS
         analysis_complete_event = analyze_button.click(
             fn=callbacks.run_initial_analysis,
             inputs=[upload_button],
         for dropdown in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
             dropdown.change(fn=callbacks.create_scatterplot, inputs=scatter_inputs, outputs=[plot_scatter])
+        # Chained callback for the stratification dropdowns
+        dd_stratify_by_col.change(
+            fn=callbacks.update_filter_dropdown,
+            inputs=[state_analyzer, dd_stratify_by_col],
+            outputs=[dd_stratify_by_value]
         )
+        # Clustering callback now listens to changes on ALL THREE controls
+        cluster_inputs = [state_analyzer, dd_stratify_by_col, dd_stratify_by_value, num_clusters]
+        cluster_outputs = [plot_cluster, plot_elbow, md_cluster_summary, md_cluster_personas, plot_cluster_profile]
+        for control in [dd_stratify_by_col, dd_stratify_by_value, num_clusters]:
+            control.change(
+                fn=callbacks.update_stratified_clustering,
+                inputs=cluster_inputs,
+                outputs=cluster_outputs
+            )
     demo.launch(debug=False, server_name="0.0.0.0")