File size: 6,439 Bytes
0a8cbc3
d9ea3f9
60da408
c9ba3ae
05e596d
c9ba3ae
05e596d
 
 
0a8cbc3
 
 
 
05e596d
 
60da408
0a8cbc3
 
51dfd28
0a8cbc3
 
51dfd28
0d6622c
0a8cbc3
 
 
 
 
60da408
0a8cbc3
 
f311ea6
0a8cbc3
 
16f99f2
f311ea6
 
 
16f99f2
f311ea6
 
16f99f2
 
 
 
 
 
f311ea6
16f99f2
 
 
 
 
 
05e596d
 
 
 
 
91f1cc5
05e596d
16f99f2
05e596d
 
f311ea6
05e596d
 
 
91f1cc5
 
05e596d
91f1cc5
 
05e596d
91f1cc5
 
05e596d
91f1cc5
 
 
 
 
 
16f99f2
 
05e596d
16f99f2
 
 
 
05e596d
 
16f99f2
91f1cc5
05e596d
16f99f2
0a8cbc3
16f99f2
 
0a8cbc3
 
 
16f99f2
91f1cc5
0a8cbc3
0d6622c
16f99f2
 
 
 
91f1cc5
05e596d
 
 
 
 
91f1cc5
05e596d
 
 
 
 
 
 
 
 
 
 
0a8cbc3
 
c9ba3ae
0a8cbc3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# app.py

# -*- coding: utf-8 -*-
#
# PROJECT:      CognitiveEDA v5.9 - The QuantumLeap Intelligence Platform
#
# DESCRIPTION:  Main application entry point. This version implements a fully
#               generic, data-agnostic stratification system, allowing users
#               to dynamically filter and analyze any high-cardinality feature.
#
# SETUP:        $ pip install -r requirements.txt
#
# AUTHOR:       An MCP & PhD Expert in Data & AI Solutions
# VERSION:      5.9 (Generic Stratification Engine)
# LAST-UPDATE:  2023-11-01 (Abstracted stratification controls)

import warnings
import logging
import gradio as gr

from ui import callbacks
from core.config import settings

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - [%(levelname)s] - (%(filename)s:%(lineno)d) - %(message)s'
)
warnings.filterwarnings('ignore', category=FutureWarning)

def main():
    logging.info(f"Starting {settings.APP_TITLE}")

    with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"), title=settings.APP_TITLE) as demo:
        
        # 1. DEFINE THE UI LAYOUT
        state_analyzer = gr.State()
        gr.Markdown(f"<h1>{settings.APP_TITLE}</h1>")
        with gr.Row():
            upload_button = gr.File(label="1. Upload Data File", file_types=[".csv", ".xlsx"], scale=3)
            analyze_button = gr.Button("✨ Generate Intelligence Report", variant="primary", scale=1)
        with gr.Tabs():
            with gr.Tab("πŸ€– AI-Powered Strategy Report"):
                ai_report_output = gr.Markdown("### Your AI-generated report will appear here...")
            with gr.Tab("πŸ“‹ Data Profile"):
                profile_missing_df, profile_numeric_df, profile_categorical_df = gr.DataFrame(), gr.DataFrame(), gr.DataFrame()
            with gr.Tab("πŸ“Š Overview Visuals"):
                with gr.Row(): plot_types, plot_missing = gr.Plot(), gr.Plot()
                plot_correlation = gr.Plot()
            with gr.Tab("🎨 Interactive Explorer"):
                with gr.Row():
                    dd_hist_col = gr.Dropdown(label="Select Column for Histogram", interactive=True)
                    plot_histogram = gr.Plot()
                with gr.Row():
                    with gr.Column(scale=1):
                        dd_scatter_x = gr.Dropdown(label="X-Axis", interactive=True)
                        dd_scatter_y = gr.Dropdown(label="Y-Axis", interactive=True)
                        dd_scatter_color = gr.Dropdown(label="Color By", interactive=True)
                    with gr.Column(scale=2):
                        plot_scatter = gr.Plot()
            
            # --- GENERALIZED CLUSTERING TAB ---
            with gr.Tab("🧩 Clustering (K-Means)", visible=False) as tab_cluster:
                gr.Markdown("### Stratified Analysis Control Panel")
                gr.Markdown("Use this to control for a dominant categorical variable. First, select a feature to stratify by (e.g., 'Region', 'Product_Category'), then select a specific value to analyze.")
                with gr.Row():
                    dd_stratify_by_col = gr.Dropdown(label="1. Stratify By Feature", interactive=True, scale=2)
                    dd_stratify_by_value = gr.Dropdown(label="2. Filter By Value", interactive=True, scale=2)
                    num_clusters = gr.Slider(minimum=2, maximum=10, value=4, step=1, label="Number of Clusters (K)", interactive=True, scale=1)
                
                gr.Markdown("---")
                gr.Markdown("## Segmentation Visualization & Profile")
                with gr.Row():
                    with gr.Column(scale=1):
                        md_cluster_summary = gr.Markdown("Methodology summary will appear here.")
                        md_cluster_personas = gr.Markdown("Detailed cluster personas will appear here...")
                    with gr.Column(scale=2):
                        plot_cluster = gr.Plot(label="PCA Visualization")
                        plot_cluster_profile = gr.Plot(label="Cluster Profile Visualization")
                
                gr.Markdown("---")
                gr.Markdown("## Optimal K Analysis")
                plot_elbow = gr.Plot(label="The Elbow Method")
            
            tab_timeseries, tab_text = gr.Tab("βŒ› Time-Series", visible=False), gr.Tab("πŸ“ Text", visible=False)

        # 2. DEFINE OUTPUT LISTS
        main_outputs = [
            ai_report_output, profile_missing_df, profile_numeric_df, profile_categorical_df,
            plot_types, plot_missing, plot_correlation,
            dd_hist_col, dd_scatter_x, dd_scatter_y, dd_scatter_color,
            tab_timeseries, tab_text, tab_cluster,
            dd_stratify_by_col
        ]
        
        # 3. REGISTER EVENT HANDLERS
        analysis_complete_event = analyze_button.click(
            fn=callbacks.run_initial_analysis,
            inputs=[upload_button],
            outputs=[state_analyzer]
        )
        analysis_complete_event.then(
            fn=callbacks.generate_reports_and_visuals,
            inputs=[state_analyzer],
            outputs=main_outputs
        )

        dd_hist_col.change(fn=callbacks.create_histogram, inputs=[state_analyzer, dd_hist_col], outputs=[plot_histogram])
        scatter_inputs = [state_analyzer, dd_scatter_x, dd_scatter_y, dd_scatter_color]
        for dropdown in [dd_scatter_x, dd_scatter_y, dd_scatter_color]:
            dropdown.change(fn=callbacks.create_scatterplot, inputs=scatter_inputs, outputs=[plot_scatter])

        # Chained callback for the stratification dropdowns
        dd_stratify_by_col.change(
            fn=callbacks.update_filter_dropdown,
            inputs=[state_analyzer, dd_stratify_by_col],
            outputs=[dd_stratify_by_value]
        )
        
        # Clustering callback now listens to changes on ALL THREE controls
        cluster_inputs = [state_analyzer, dd_stratify_by_col, dd_stratify_by_value, num_clusters]
        cluster_outputs = [plot_cluster, plot_elbow, md_cluster_summary, md_cluster_personas, plot_cluster_profile]
        
        for control in [dd_stratify_by_col, dd_stratify_by_value, num_clusters]:
            control.change(
                fn=callbacks.update_stratified_clustering,
                inputs=cluster_inputs,
                outputs=cluster_outputs
            )
    
    demo.launch(debug=False, server_name="0.0.0.0")

if __name__ == "__main__":
    main()