Spaces:

mgbam
/

PhoenixUI

Sleeping

File size: 18,115 Bytes

# Odyssey - The AI Data Science Workspace
# A demonstration of a state-of-the-art, AI-native analytic environment.

import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import io, os, json, base64, logging, warnings, pickle, uuid
from contextlib import redirect_stdout
from datetime import datetime

# ML & Preprocessing Imports
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import roc_curve, auc, confusion_matrix, r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer

# --- Configuration ---
warnings.filterwarnings('ignore')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- UI Theme & Icons ---
THEME = gr.themes.Monochrome(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate").set(
    body_background_fill="radial-gradient(circle, rgba(20,20,80,1) 0%, rgba(0,0,10,1) 100%);",
    block_label_background_fill="rgba(255,255,255,0.05)",
    block_background_fill="rgba(255,255,255,0.05)",
    button_primary_background_fill="linear-gradient(90deg, #6A11CB 0%, #2575FC 100%)",
    button_secondary_background_fill="linear-gradient(90deg, #556270 0%, #4ECDC4 100%)",
    color_accent_soft="rgba(255,255,255,0.2)"
)
ICONS = {"overview": "🔭", "medic": "🧪", "launchpad": "🚀", "copilot": "💡", "export": "📄"}

# --- Core State & Project Management ---
def init_state():
    """Initializes a blank global state."""
    return {
        "project_name": None,
        "df_original": None,
        "df_modified": None,
        "metadata": None,
        "insights": None,
        "chat_history": [],
        "dynamic_dashboards": {}
    }

def save_project(state):
    """Saves the entire application state to a .osyssey file."""
    if not state or not state.get("project_name"):
        return gr.update(value="Project needs a name to save.", interactive=True)
    
    filename = f"{state['project_name']}.odyssey"
    # Convert dataframes to pickle strings for serialization
    state_to_save = state.copy()
    if state_to_save['df_original'] is not None:
        state_to_save['df_original'] = state_to_save['df_original'].to_pickle()
    if state_to_save['df_modified'] is not None:
        state_to_save['df_modified'] = state_to_save['df_modified'].to_pickle()

    with open(filename, "wb") as f:
        pickle.dump(state_to_save, f)
    
    return gr.update(value=f"Project saved to {filename}", interactive=True)

def load_project(file_obj):
    """Loads a .odyssey file into the application state."""
    if not file_obj: return init_state()
    with open(file_obj.name, "rb") as f:
        loaded_state = pickle.load(f)
    
    # Unpickle dataframes
    if loaded_state['df_original'] is not None:
        loaded_state['df_original'] = pd.read_pickle(io.BytesIO(loaded_state['df_original']))
    if loaded_state['df_modified'] is not None:
        loaded_state['df_modified'] = pd.read_pickle(io.BytesIO(loaded_state['df_modified']))
        
    return loaded_state

def prime_data(file_obj, project_name):
    """Main function to load a new CSV, analyze it, and set the initial state."""
    if not file_obj: return init_state()
    df = pd.read_csv(file_obj.name)
    
    # Smart type conversion
    for col in df.select_dtypes(include=['object']).columns:
        try:
            df[col] = pd.to_datetime(df[col], errors='raise')
        except (ValueError, TypeError):
            if 0.5 > df[col].nunique() / len(df) > 0.0:
                df[col] = df[col].astype('category')
    
    metadata = extract_metadata(df)
    insights = run_helios_engine(df, metadata)
    
    return {
        "project_name": project_name or f"Project_{datetime.now().strftime('%Y%m%d_%H%M')}",
        "df_original": df,
        "df_modified": df.copy(),
        "metadata": metadata,
        "insights": insights,
        "chat_history": [],
        "dynamic_dashboards": {}
    }

def extract_metadata(df):
    """Utility to get schema and column types."""
    return {
        'shape': df.shape,
        'columns': df.columns.tolist(),
        'numeric': df.select_dtypes(include=np.number).columns.tolist(),
        'categorical': df.select_dtypes(include=['object', 'category']).columns.tolist(),
        'datetime': df.select_dtypes(include='datetime').columns.tolist(),
        'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
    }

# --- Helios Overview Engine ---
def run_helios_engine(df, metadata):
    """The proactive analysis engine."""
    insights = {}
    # Missing Data
    missing = df.isnull().sum()
    insights['missing_data'] = missing[missing > 0].sort_values(ascending=False)
    # High Cardinality
    insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical'] if df[c].nunique() > 50}
    # Outlier Detection
    outliers = {}
    for col in metadata['numeric']:
        Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
        IQR = Q3 - Q1
        count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
        if count > 0: outliers[col] = count
    insights['outliers'] = outliers
    # ML Target Suggestions
    suggestions = []
    for col in metadata['categorical']:
        if df[col].nunique() == 2: suggestions.append(f"{col} (Classification)")
    for col in metadata['numeric']:
        if df[col].nunique() > 20: suggestions.append(f"{col} (Regression)")
    insights['ml_suggestions'] = suggestions
    return insights

# --- Asclepius Data Lab Handlers ---
def medic_preview_imputation(state, col, num_method, cat_method):
    if not col or col not in state['df_modified'].columns: return None
    df_mod = state['df_modified'].copy()
    
    if col in state['metadata']['numeric']:
        if num_method == 'KNN':
            imputer = KNNImputer(n_neighbors=5)
            df_mod[col] = imputer.fit_transform(df_mod[[col]])
        else:
            value = df_mod[col].mean() if num_method == 'mean' else df_mod[col].median()
            df_mod[col].fillna(value, inplace=True)
        
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=state['df_original'][col], name='Original', opacity=0.7))
        fig.add_trace(go.Histogram(x=df_mod[col], name='Imputed', opacity=0.7))
        fig.update_layout(barmode='overlay', title_text=f"Distribution for '{col}'", legend_title_text='Dataset')
        return fig
    
    elif col in state['metadata']['categorical']:
        if cat_method == "Create 'Missing' Category":
            df_mod[col] = df_mod[col].cat.add_categories("Missing").fillna("Missing") if hasattr(df_mod[col], 'cat') else df_mod[col].fillna("Missing")
        else: # Mode
            df_mod[col].fillna(df_mod[col].mode()[0], inplace=True)
        
        fig = go.Figure()
        fig.add_trace(go.Bar(x=state['df_original'][col].value_counts().index, y=state['df_original'][col].value_counts().values, name='Original'))
        fig.add_trace(go.Bar(x=df_mod[col].value_counts().index, y=df_mod[col].value_counts().values, name='Imputed'))
        return fig
    return None

# --- Prometheus Launchpad Handlers ---
def prometheus_run_model(state, target, features, model_name):
    if not target or not features: return None, None, "Select target and features."
    df = state['df_modified'].copy()
    df.dropna(subset=[target] + features, inplace=True)
    
    le_map = {}
    for col in [target] + features:
        if df[col].dtype.name in ['category', 'object']:
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])
            le_map[col] = le
            
    X, y = df[features], df[target]
    problem_type = "Classification" if y.nunique() <= 10 else "Regression"
    
    MODELS = {
        "Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
        "Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}
    }
    if model_name not in MODELS[problem_type]: return None, None, "Invalid model for this problem type."
    
    model = MODELS[problem_type][model_name](random_state=42)
    
    if problem_type == "Classification":
        scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
        report = f"**Cross-Validated Accuracy:** {np.mean(scores):.3f} ± {np.std(scores):.3f}"
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        model.fit(X_train, y_train)
        
        # ROC Curve
        y_prob = model.predict_proba(X_test)[:, 1]
        fpr, tpr, _ = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)
        fig1 = go.Figure(data=go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC curve (area = {roc_auc:.2f})'))
        fig1.add_scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dash'), name='Random Chance')
        fig1.update_layout(title="ROC Curve")
        
        # Feature Importance
        if hasattr(model, 'feature_importances_'):
            fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
            fig2 = px.bar(fi, title="Feature Importance")
        else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)")
            
        return fig1, fig2, report
    else: # Regression
        scores = cross_val_score(model, X, y, cv=5, scoring='r2')
        report = f"**Cross-Validated R² Score:** {np.mean(scores):.3f} ± {np.std(scores):.3f}"
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        
        # Residuals Plot
        residuals = y_test - preds
        fig1 = px.scatter(x=preds, y=residuals, title="Residuals vs. Predicted Plot", labels={'x': 'Predicted Values', 'y': 'Residuals'})
        fig1.add_hline(y=0, line_dash="dash")
        
        # Feature Importance
        if hasattr(model, 'feature_importances_'):
            fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
            fig2 = px.bar(fi, title="Feature Importance")
        else: fig2 = go.Figure().update_layout(title="Feature Importance (Not available for this model)")

        return fig1, fig2, report

# --- Athena Co-pilot Handlers ---
def athena_respond(user_message, history, state, api_key):
    # Main co-pilot logic
    pass # This would contain the full logic from previous examples

def render_dynamic_dashboard(state, dashboard_id):
    """Renders a dynamically generated dashboard from the state."""
    # This is a placeholder for the advanced dashboard rendering logic.
    # In a real scenario, this would execute the Gradio code string stored in state.
    if dashboard_id in state['dynamic_dashboards']:
        # This is where we would dynamically create the Gradio components
        # For this example, we'll return a placeholder
        return gr.Markdown(f"### Dashboard: {dashboard_id}\n(Dynamic rendering placeholder)")
    return gr.Markdown("Dashboard not found.")

# --- UI Builder Functions ---
def build_ui():
    with gr.Blocks(theme=THEME, title="Odyssey AI Data Workspace") as demo:
        state = gr.State(init_state())

        with gr.Row():
            # Left Sidebar - Command Center
            with gr.Column(scale=1):
                gr.Markdown("# 🦉 Odyssey")
                
                with gr.Accordion("📂 Project", open=True):
                    project_name_input = gr.Textbox(label="Project Name", value="New_Project")
                    file_input = gr.File(label="Upload CSV", file_types=[".csv"])
                    with gr.Row():
                        save_btn = gr.Button("Save")
                        load_btn = gr.UploadButton("Load .odyssey")
                    project_status = gr.Markdown()
                
                # Navigation buttons
                overview_btn = gr.Button(f"{ICONS['overview']} Helios Overview")
                medic_btn = gr.Button(f"{ICONS['medic']} Asclepius Data Lab")
                launchpad_btn = gr.Button(f"{ICONS['launchpad']} Prometheus Launchpad")
                copilot_btn = gr.Button(f"{ICONS['copilot']} Athena Co-pilot")
                export_btn = gr.Button(f"{ICONS['export']} Export Report")

                # Global Info
                with gr.Accordion("Global Info", open=False):
                    file_info_md = gr.Markdown("No file loaded.")

            # Right Panel - Main Workspace
            with gr.Column(scale=4):
                # --- Helios Overview Panel ---
                with gr.Column(visible=True) as overview_panel:
                    gr.Markdown(f"# {ICONS['overview']} Helios Overview")
                    gr.Markdown("A proactive, high-level summary of your dataset.")
                    # Interactive dashboard components would go here
                    helios_report_md = gr.Markdown("Upload data to begin analysis.")
                
                # --- Asclepius Data Lab Panel ---
                with gr.Column(visible=False) as medic_panel:
                    gr.Markdown(f"# {ICONS['medic']} Asclepius Data Lab")
                    gr.Markdown("Interactively clean and prepare your data.")
                    # UI components for Data Medic
                    medic_col_select = gr.Dropdown(label="Select Column to Clean")
                    with gr.Row():
                        medic_num_method = gr.Radio(['mean', 'median', 'KNN'], label="Numeric Imputation", value='mean')
                        medic_cat_method = gr.Radio(['mode', "Create 'Missing' Category"], label="Categorical Imputation", value='mode')
                    medic_preview_plot = gr.Plot()
                    medic_apply_btn = gr.Button("Apply Changes to Session")

                # --- Prometheus Launchpad Panel ---
                with gr.Column(visible=False) as launchpad_panel:
                    gr.Markdown(f"# {ICONS['launchpad']} Prometheus Launchpad")
                    gr.Markdown("Train, evaluate, and understand predictive models.")
                    # UI components for Launchpad
                    with gr.Row():
                        lp_target = gr.Dropdown(label="🎯 Target")
                        lp_features = gr.Multiselect(label="✨ Features")
                        lp_model = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="🧠 Model")
                    lp_run_btn = gr.Button("🚀 Launch Model Training (with CV)")
                    lp_report_md = gr.Markdown()
                    with gr.Row():
                        lp_fig1 = gr.Plot()
                        lp_fig2 = gr.Plot()
                
                # --- Athena Co-pilot Panel ---
                with gr.Column(visible=False) as copilot_panel:
                    gr.Markdown(f"# {ICONS['copilot']} Athena Co-pilot")
                    gr.Markdown("Your collaborative AI data scientist. Ask anything.")
                    # Chatbot UI
                    chatbot = gr.Chatbot(height=500)
                    with gr.Accordion("AI Generated Dashboard", open=False) as dynamic_dash_accordion:
                        dynamic_dash_output = gr.Group() # Placeholder for dynamic content
                    chat_input = gr.Textbox(label="Your Request")
                    chat_submit = gr.Button("Send", variant="primary")
        
        # --- Event Handling ---
        
        # Panel Navigation
        panels = [overview_panel, medic_panel, launchpad_panel, copilot_panel]
        def switch_panel(btn_idx):
            return [gr.update(visible=i == btn_idx) for i in range(len(panels))]
        
        overview_btn.click(lambda: switch_panel(0), None, panels)
        medic_btn.click(lambda: switch_panel(1), None, panels)
        launchpad_btn.click(lambda: switch_panel(2), None, panels)
        copilot_btn.click(lambda: switch_panel(3), None, panels)

        # File Upload Logic
        def on_upload(state, file, name):
            new_state = prime_data(file, name)
            # Update all UI components based on the new state
            helios_md = "No data loaded."
            if new_state.get('insights'):
                helios_md = f"### {ICONS['ml_suggestions']} ML Suggestions\n" + "\n".join([f"- `{s}`" for s in new_state['insights']['ml_suggestions']])
                # ... Add more sections for a full report
            
            file_info = f"**File:** `{os.path.basename(file.name)}`\n\n**Shape:** `{new_state['metadata']['shape']}`"
            
            all_cols = new_state['metadata']['columns']
            missing_cols = new_state['insights']['missing_data'].index.tolist()

            return new_state, helios_md, file_info, gr.update(choices=missing_cols), gr.update(choices=all_cols), gr.update(choices=all_cols)

        file_input.upload(on_upload, [state, file_input, project_name_input], [state, helios_report_md, file_info_md, medic_col_select, lp_target, lp_features])
        
        # Project Management
        save_btn.click(save_project, state, project_status)
        
        # Asclepius Live Preview
        medic_col_select.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
        medic_num_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)
        medic_cat_method.change(medic_preview_imputation, [state, medic_col_select, medic_num_method, medic_cat_method], medic_preview_plot)

        # Prometheus Model Training
        lp_run_btn.click(prometheus_run_model, [state, lp_target, lp_features, lp_model], [lp_fig1, lp_fig2, lp_report_md])

        return demo

# --- Main Execution ---
if __name__ == "__main__":
    app = build_ui()
    app.launch(debug=True)