# app.py
# -*- coding: utf-8 -*-
#
# PROJECT: CognitiveEDA v5.9 - The QuantumLeap Intelligence Platform
#
# DESCRIPTION: Main application entry point. This version implements a fully
# generic, data-agnostic stratification system, allowing users
# to dynamically filter and analyze any high-cardinality feature.
#
# SETUP: $ pip install -r requirements.txt
#
# AUTHOR: An MCP & PhD Expert in Data & AI Solutions
# VERSION: 5.9 (Generic Stratification Engine)
# LAST-UPDATE: 2023-11-01 (Abstracted stratification controls)
import warnings
import logging
import gradio as gr
from ui import callbacks
from core.config import settings
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - [%(levelname)s] - (%(filename)s:%(lineno)d) - %(message)s'
)
warnings.filterwarnings('ignore', category=FutureWarning)
def main():
logging.info(f"Starting {settings.APP_TITLE}")
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"), title=settings.APP_TITLE) as demo:
# 1. DEFINE THE UI LAYOUT
state_analyzer = gr.State()
gr.Markdown(f"
{settings.APP_TITLE}
")
with gr.Row():
upload_button = gr.File(label="1. Upload Data File", file_types=[".csv", ".xlsx"], scale=3)
analyze_button = gr.Button("✨ Generate Intelligence Report", variant="primary", scale=1)
with gr.Tabs():
with gr.Tab("🤖 AI-Powered Strategy Report"):
ai_report_output = gr.Markdown("### Your AI-generated report will appear here...")
with gr.Tab("📋 Data Profile"):
profile_missing_df, profile_numeric_df, profile_categorical_df = gr.DataFrame(), gr.DataFrame(), gr.DataFrame()
with gr.Tab("📊 Overview Visuals"):
with gr.Row(): plot_types, plot_missing = gr.Plot(), gr.Plot()
plot_correlation = gr.Plot()
with gr.Tab("🎨 Interactive Explorer"):
with gr.Row():
dd_hist_col = gr.Dropdown(label="Select Column for Histogram", interactive=True)
plot_histogram = gr.Plot()
with gr.Row():
with gr.Column(scale=1):
dd_scatter_x = gr.Dropdown(label="X-Axis", interactive=True)
dd_scatter_y = gr.Dropdown(label="Y-Axis", interactive=True)
dd_scatter_color = gr.Dropdown(label="Color By", interactive=True)
with gr.Column(scale=2):
plot_scatter = gr.Plot()
# --- GENERALIZED CLUSTERING TAB ---
with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster:
gr.Markdown("### Stratified Analysis Control Panel")
gr.Markdown("Use this to control for a dominant categorical variable. First, select a feature to stratify by (e.g., 'Region', 'Product_Category'), then select a specific value to analyze.")
with gr.Row():
dd_stratify_by_col = gr.Dropdown(label="1. Stratify By Feature", interactive=True, scale=2)
dd_stratify_by_value = gr.Dropdown(label="2. Filter By Value", interactive=True, scale=2)
num_clusters = gr.Slider(minimum=2, maximum=10, value=4, step=1, label="Number of Clusters (K)", interactive=True, scale=1)
gr.Markdown("---")
gr.Markdown("## Segmentation Visualization & Profile")
with gr.Row():
with gr.Column(scale=1):
md_cluster_summary = gr.Markdown("Methodology summary will appear here.")
md_cluster_personas = gr.Markdown("Detailed cluster personas will appear here...")
with gr.Column(scale=2):
plot_cluster = gr.Plot(label="PCA Visualization")
plot_cluster_profile = gr.Plot(label="Cluster Profile Visualization")
gr.Markdown("---")
gr.Markdown("## Optimal K Analysis")
plot_elbow = gr.Plot(label="The Elbow Method")
tab_timeseries, tab_text = gr.Tab("⌛ Time-Series", visible=False), gr.Tab("📝 Text", visible=False)
# 2. DEFINE OUTPUT LISTS
main_outputs = [
ai_report_output, profile_missing_df, profile_numeric_df, profile_categorical_df,
plot_types, plot_missing, plot_correlation,
dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color,
tab_timeseries, tab_text, tab_cluster,
dd_stratify_by_col
]
# 3. REGISTER EVENT HANDLERS
analysis_complete_event = analyze_button.click(
fn=callbacks.run_initial_analysis,
inputs=[upload_button],
outputs=[state_analyzer]
)
analysis_complete_event.then(
fn=callbacks.generate_reports_and_visuals,
inputs=[state_analyzer],
outputs=main_outputs
)
dd_hist_col.change(fn=callbacks.create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=[plot_histogram])
scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
for dropdown in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
dropdown.change(fn=callbacks.create_scatterplot, inputs=scatter_inputs, outputs=[plot_scatter])
# Chained callback for the stratification dropdowns
dd_stratify_by_col.change(
fn=callbacks.update_filter_dropdown,
inputs=[state_analyzer, dd_stratify_by_col],
outputs=[dd_stratify_by_value]
)
# Clustering callback now listens to changes on ALL THREE controls
cluster_inputs = [state_analyzer, dd_stratify_by_col, dd_stratify_by_value, num_clusters]
cluster_outputs = [plot_cluster, plot_elbow, md_cluster_summary, md_cluster_personas, plot_cluster_profile]
for control in [dd_stratify_by_col, dd_stratify_by_value, num_clusters]:
control.change(
fn=callbacks.update_stratified_clustering,
inputs=cluster_inputs,
outputs=cluster_outputs
)
demo.launch(debug=False, server_name="0.0.0.0")
if __name__ == "__main__":
main()