# app.py # -*- coding: utf-8 -*- # # PROJECT: CognitiveEDA v5.9 - The QuantumLeap Intelligence Platform # # DESCRIPTION: Main application entry point. This version implements a fully # generic, data-agnostic stratification system, allowing users # to dynamically filter and analyze any high-cardinality feature. # # SETUP: $ pip install -r requirements.txt # # AUTHOR: An MCP & PhD Expert in Data & AI Solutions # VERSION: 5.9 (Generic Stratification Engine) # LAST-UPDATE: 2023-11-01 (Abstracted stratification controls) import warnings import logging import gradio as gr from ui import callbacks from core.config import settings logging.basicConfig( level=logging.INFO, format='%(asctime)s - [%(levelname)s] - (%(filename)s:%(lineno)d) - %(message)s' ) warnings.filterwarnings('ignore', category=FutureWarning) def main(): logging.info(f"Starting {settings.APP_TITLE}") with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"), title=settings.APP_TITLE) as demo: # 1. DEFINE THE UI LAYOUT state_analyzer = gr.State() gr.Markdown(f"

{settings.APP_TITLE}

") with gr.Row(): upload_button = gr.File(label="1. Upload Data File", file_types=[".csv", ".xlsx"], scale=3) analyze_button = gr.Button("✨ Generate Intelligence Report", variant="primary", scale=1) with gr.Tabs(): with gr.Tab("🤖 AI-Powered Strategy Report"): ai_report_output = gr.Markdown("### Your AI-generated report will appear here...") with gr.Tab("📋 Data Profile"): profile_missing_df, profile_numeric_df, profile_categorical_df = gr.DataFrame(), gr.DataFrame(), gr.DataFrame() with gr.Tab("📊 Overview Visuals"): with gr.Row(): plot_types, plot_missing = gr.Plot(), gr.Plot() plot_correlation = gr.Plot() with gr.Tab("🎨 Interactive Explorer"): with gr.Row(): dd_hist_col = gr.Dropdown(label="Select Column for Histogram", interactive=True) plot_histogram = gr.Plot() with gr.Row(): with gr.Column(scale=1): dd_scatter_x = gr.Dropdown(label="X-Axis", interactive=True) dd_scatter_y = gr.Dropdown(label="Y-Axis", interactive=True) dd_scatter_color = gr.Dropdown(label="Color By", interactive=True) with gr.Column(scale=2): plot_scatter = gr.Plot() # --- GENERALIZED CLUSTERING TAB --- with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster: gr.Markdown("### Stratified Analysis Control Panel") gr.Markdown("Use this to control for a dominant categorical variable. First, select a feature to stratify by (e.g., 'Region', 'Product_Category'), then select a specific value to analyze.") with gr.Row(): dd_stratify_by_col = gr.Dropdown(label="1. Stratify By Feature", interactive=True, scale=2) dd_stratify_by_value = gr.Dropdown(label="2. Filter By Value", interactive=True, scale=2) num_clusters = gr.Slider(minimum=2, maximum=10, value=4, step=1, label="Number of Clusters (K)", interactive=True, scale=1) gr.Markdown("---") gr.Markdown("## Segmentation Visualization & Profile") with gr.Row(): with gr.Column(scale=1): md_cluster_summary = gr.Markdown("Methodology summary will appear here.") md_cluster_personas = gr.Markdown("Detailed cluster personas will appear here...") with gr.Column(scale=2): plot_cluster = gr.Plot(label="PCA Visualization") plot_cluster_profile = gr.Plot(label="Cluster Profile Visualization") gr.Markdown("---") gr.Markdown("## Optimal K Analysis") plot_elbow = gr.Plot(label="The Elbow Method") tab_timeseries, tab_text = gr.Tab("⌛ Time-Series", visible=False), gr.Tab("📝 Text", visible=False) # 2. DEFINE OUTPUT LISTS main_outputs = [ ai_report_output, profile_missing_df, profile_numeric_df, profile_categorical_df, plot_types, plot_missing, plot_correlation, dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color, tab_timeseries, tab_text, tab_cluster, dd_stratify_by_col ] # 3. REGISTER EVENT HANDLERS analysis_complete_event = analyze_button.click( fn=callbacks.run_initial_analysis, inputs=[upload_button], outputs=[state_analyzer] ) analysis_complete_event.then( fn=callbacks.generate_reports_and_visuals, inputs=[state_analyzer], outputs=main_outputs ) dd_hist_col.change(fn=callbacks.create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=[plot_histogram]) scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color] for dropdown in [dd_scatter_x, dd_scatter_y, dd_scatter_color]: dropdown.change(fn=callbacks.create_scatterplot, inputs=scatter_inputs, outputs=[plot_scatter]) # Chained callback for the stratification dropdowns dd_stratify_by_col.change( fn=callbacks.update_filter_dropdown, inputs=[state_analyzer, dd_stratify_by_col], outputs=[dd_stratify_by_value] ) # Clustering callback now listens to changes on ALL THREE controls cluster_inputs = [state_analyzer, dd_stratify_by_col, dd_stratify_by_value, num_clusters] cluster_outputs = [plot_cluster, plot_elbow, md_cluster_summary, md_cluster_personas, plot_cluster_profile] for control in [dd_stratify_by_col, dd_stratify_by_value, num_clusters]: control.change( fn=callbacks.update_stratified_clustering, inputs=cluster_inputs, outputs=cluster_outputs ) demo.launch(debug=False, server_name="0.0.0.0") if __name__ == "__main__": main()