import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import io
import json
import warnings
import google.generativeai as genai
import os
import logging
from contextlib import redirect_stdout
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix, r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder

# --- Configuration ---
warnings.filterwarnings('ignore')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
THEME = gr.themes.Glass(primary_hue="blue", secondary_hue="cyan").set(
    body_background_fill="rgba(0,0,0,0.8)",
    block_background_fill="rgba(0,0,0,0.6)",
    block_border_width="1px",
    border_color_primary="rgba(255,255,255,0.1)"
)
MODEL_REGISTRY = {
    "Classification": {"Random Forest": RandomForestClassifier, "Logistic Regression": LogisticRegression},
    "Regression": {"Random Forest": RandomForestRegressor, "Linear Regression": LinearRegression}
}

# --- Core Logic ---

def safe_exec(code_string: str, local_vars: dict) -> tuple:
    """Safely execute a string of Python code and capture its output."""
    output_buffer = io.StringIO()
    try:
        with redirect_stdout(output_buffer):
            exec(code_string, globals(), local_vars)
        stdout = output_buffer.getvalue()
        fig = local_vars.get('fig')
        df_out = local_vars.get('df_result')
        return stdout, fig, df_out, None
    except Exception as e:
        return None, None, None, f"Execution Error: {str(e)}"

def prime_data(file_obj):
    """Loads, analyzes, and primes the entire application state upon file upload."""
    if not file_obj:
        return {gr.update(visible=False): None}

    try:
        df = pd.read_csv(file_obj.name)
        
        # Smart type conversion
        for col in df.select_dtypes(include=['object']).columns:
            try:
                df[col] = pd.to_datetime(df[col], errors='raise')
            except (ValueError, TypeError):
                if df[col].nunique() / len(df) < 0.5: # If not too many unique values
                    df[col] = df[col].astype('category')

        # --- Phoenix Eye: Proactive Insights Engine ---
        insights = {}
        metadata = extract_dataset_metadata(df)
        
        # 1. Missing Data
        missing = df.isnull().sum()
        insights['missing'] = missing[missing > 0].sort_values(ascending=False)
        
        # 2. High Cardinality
        insights['high_cardinality'] = {c: df[c].nunique() for c in metadata['categorical_cols'] if df[c].nunique() > 50}
        
        # 3. High Correlations
        if len(metadata['numeric_cols']) > 1:
            corr = df[metadata['numeric_cols']].corr().abs()
            sol = corr.unstack()
            so = sol.sort_values(kind="quicksort", ascending=False)
            so = so[so < 1] # Remove self-correlation
            insights['high_correlations'] = so.head(5)
            
        # 4. Outlier Detection (IQR method)
        outliers = {}
        for col in metadata['numeric_cols']:
            Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
            IQR = Q3 - Q1
            outlier_count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
            if outlier_count > 0:
                outliers[col] = outlier_count
        insights['outliers'] = outliers
        
        # 5. ML Target Suggestion
        suggestions = []
        for col in metadata['categorical_cols']:
            if df[col].nunique() == 2:
                suggestions.append(f"{col} (Binary Classification)")
        for col in metadata['numeric_cols']:
            if df[col].nunique() > 20: # Heuristic for continuous target
                 suggestions.append(f"{col} (Regression)")
        insights['ml_suggestions'] = suggestions
        
        state = {
            'df_original': df,
            'df_modified': df.copy(),
            'filename': os.path.basename(file_obj.name),
            'metadata': metadata,
            'proactive_insights': insights
        }

        # Generate UI updates
        overview_md = generate_phoenix_eye_markdown(state)
        all_cols = metadata['columns']
        num_cols = metadata['numeric_cols']
        cat_cols = metadata['categorical_cols']
        
        return {
            global_state: state,
            phoenix_tabs: gr.update(visible=True),
            phoenix_eye_output: overview_md,
            # Data Medic updates
            medic_col_select: gr.update(choices=insights['missing'].index.tolist() or [], interactive=True),
            # Oracle updates
            oracle_target_select: gr.update(choices=all_cols, interactive=True),
            oracle_feature_select: gr.update(choices=all_cols, interactive=True),
        }

    except Exception as e:
        logging.error(f"Priming Error: {e}")
        return {phoenix_eye_output: gr.update(value=f"❌ **Error:** {e}")}

def extract_dataset_metadata(df):
    """Extracts typed metadata from a DataFrame."""
    rows, cols = df.shape
    return {
        'shape': (rows, cols),
        'columns': df.columns.tolist(),
        'numeric_cols': df.select_dtypes(include=np.number).columns.tolist(),
        'categorical_cols': df.select_dtypes(include=['object', 'category']).columns.tolist(),
        'datetime_cols': df.select_dtypes(include=['datetime64', 'datetime64[ns]']).columns.tolist(),
        'dtypes': df.dtypes.apply(lambda x: x.name).to_dict()
    }

def generate_phoenix_eye_markdown(state):
    """Creates the markdown for the proactive insights dashboard."""
    insights = state['proactive_insights']
    md = f"## 🦅 Phoenix Eye: Proactive Insights for `{state['filename']}`\n"
    md += f"Dataset has **{state['metadata']['shape'][0]} rows** and **{state['metadata']['shape'][1]} columns**.\n\n"
    
    # ML Suggestions
    md += "### 🔮 Potential ML Targets\n"
    if insights['ml_suggestions']:
        for s in insights['ml_suggestions']: md += f"- `{s}`\n"
    else: md += "No obvious ML target columns found.\n"
    md += "\n"

    # Missing Data
    md += "### 💧 Missing Data\n"
    if not insights['missing'].empty:
        md += "Found missing values in these columns. Use the **Data Medic** tab to fix.\n"
        md += insights['missing'].to_frame('Missing Count').to_markdown() + "\n"
    else: md += "✅ No missing data found!\n"
    md += "\n"

    # High Correlation
    md += "### 🔗 Top Correlations\n"
    if 'high_correlations' in insights and not insights['high_correlations'].empty:
        md += insights['high_correlations'].to_frame('Correlation').to_markdown() + "\n"
    else: md += "No strong correlations found between numeric features.\n"
    md += "\n"

    # Outliers
    md += "### 📈 Outlier Alert\n"
    if insights['outliers']:
        for col, count in insights['outliers'].items(): md += f"- `{col}` has **{count}** potential outliers.\n"
    else: md += "✅ No significant outliers detected.\n"
    md += "\n"
    
    # High Cardinality
    md += "### 🇇 High Cardinality Warning\n"
    if insights['high_cardinality']:
        for col, count in insights['high_cardinality'].items(): md += f"- `{col}` has **{count}** unique values, which may be problematic for some models.\n"
    else: md += "✅ No high-cardinality categorical columns found.\n"
    md += "\n"

    return md
    
# --- Tab Handlers ---

def medic_preview_imputation(state, col, method):
    """Shows a before-and-after plot for data imputation."""
    if not col: return None
    df_orig = state['df_original']
    df_mod = df_orig.copy()
    
    if method == 'mean': value = df_mod[col].mean()
    elif method == 'median': value = df_mod[col].median()
    else: value = df_mod[col].mode()[0]
    
    df_mod[col] = df_mod[col].fillna(value)
    
    fig = go.Figure()
    fig.add_trace(go.Histogram(x=df_orig[col], name='Before', opacity=0.7))
    fig.add_trace(go.Histogram(x=df_mod[col], name='After', opacity=0.7))
    fig.update_layout(barmode='overlay', title=f"'{col}' Distribution: Before vs. After Imputation", legend_title_text='Dataset')
    return fig

def medic_apply_imputation(state, col, method):
    """Applies imputation and updates the main state."""
    if not col: return state, "No column selected."
    df_mod = state['df_modified'].copy()
    
    if method == 'mean': value = df_mod[col].mean()
    elif method == 'median': value = df_mod[col].median()
    else: value = df_mod[col].mode()[0]
    
    df_mod[col] = df_mod[col].fillna(value)
    state['df_modified'] = df_mod
    
    # Re-run proactive insights on the modified df
    state['proactive_insights']['missing'] = df_mod.isnull().sum()
    state['proactive_insights']['missing'] = state['proactive_insights']['missing'][state['proactive_insights']['missing'] > 0]
    
    return state, f"✅ Applied '{method}' imputation to '{col}'.", gr.update(choices=state['proactive_insights']['missing'].index.tolist())

def download_cleaned_data(state):
    """Saves the modified dataframe to a csv and returns the path."""
    if state:
        df = state['df_modified']
        # Gradio handles the tempfile creation
        return gr.File.update(value=df.to_csv(index=False), visible=True)
    return gr.File.update(visible=False)

def oracle_run_model(state, target, features, model_name):
    """Trains a simple ML model and returns metrics and plots."""
    if not target or not features: return None, None, "Please select a target and at least one feature."
    
    df = state['df_modified'].copy()
    
    # Preprocessing
    df.dropna(subset=features + [target], inplace=True)
    if df.empty: return None, None, "Not enough data after dropping NA values."

    le = LabelEncoder()
    for col in features + [target]:
        if df[col].dtype == 'object' or df[col].dtype.name == 'category':
            df[col] = le.fit_transform(df[col])
            
    X = df[features]
    y = df[target]
    
    problem_type = "Classification" if y.nunique() <= 10 else "Regression"
    
    if model_name not in MODEL_REGISTRY[problem_type]:
        return None, None, f"Model {model_name} not suitable for {problem_type}."
        
    model = MODEL_REGISTRY[problem_type][model_name](random_state=42)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    
    # Results
    if problem_type == "Classification":
        acc = accuracy_score(y_test, preds)
        cm = confusion_matrix(y_test, preds)
        cm_fig = px.imshow(cm, text_auto=True, title=f"Confusion Matrix (Accuracy: {acc:.2f})")
        
        if hasattr(model, 'feature_importances_'):
            fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
            fi_fig = px.bar(fi, title="Feature Importance")
            return fi_fig, cm_fig, f"**Classification Report:**\n- Accuracy: {acc:.2f}"
        else:
            return None, cm_fig, f"**Classification Report:**\n- Accuracy: {acc:.2f}"
            
    else: # Regression
        r2 = r2_score(y_test, preds)
        rmse = np.sqrt(mean_squared_error(y_test, preds))
        
        preds_fig = px.scatter(x=y_test, y=preds, labels={'x': 'Actual Values', 'y': 'Predicted Values'}, 
                               title=f"Predictions vs. Actuals (R²: {r2:.2f})", trendline='ols')
                               
        if hasattr(model, 'feature_importances_'):
            fi = pd.Series(model.feature_importances_, index=features).sort_values(ascending=False)
            fi_fig = px.bar(fi, title="Feature Importance")
            return fi_fig, preds_fig, f"**Regression Report:**\n- R² Score: {r2:.2f}\n- RMSE: {rmse:.2f}"
        else:
            return None, preds_fig, f"**Regression Report:**\n- R² Score: {r2:.2f}\n- RMSE: {rmse:.2f}"

def copilot_respond(user_message, history, state, api_key):
    """Handles the AI Co-pilot chat interaction."""
    if not api_key:
        return history + [(user_message, "I need a Gemini API key to function.")], None, None, ""

    history += [(user_message, None)]
    
    prompt = f"""
    You are 'Phoenix Co-pilot', a world-class AI data analyst. Your goal is to help the user by writing and executing Python code.
    You have access to a pandas DataFrame named `df`. This is the user's LATEST data, including any cleaning they've performed.
    
    **DataFrame Info:**
    - Columns and dtypes: {json.dumps(state['metadata']['dtypes'])}
    
    **Instructions:**
    1.  Analyze the user's request: '{user_message}'.
    2.  Formulate a plan (thought).
    3.  Write Python code to execute the plan.
    4.  Use `pandas`, `numpy`, and `plotly.express as px`.
    5.  To show a plot, assign it to a variable `fig`. Ex: `fig = px.histogram(df, x='age')`.
    6.  To show a dataframe, assign it to a variable `df_result`. Ex: `df_result = df.describe()`.
    7.  Use `print()` for text output.
    8.  **NEVER** modify `df` in place. Use `df.copy()` if needed.
    9.  Respond **ONLY** with a single, valid JSON object with keys "thought" and "code".
    
    **User Request:** "{user_message}"
    
    **Your JSON Response:**
    """
    
    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-1.5-flash')
        response = model.generate_content(prompt)
        
        # Clean and parse JSON
        response_json = json.loads(response.text.strip().replace("```json", "").replace("```", ""))
        thought = response_json.get("thought", "Thinking...")
        code_to_run = response_json.get("code", "print('No code generated.')")
        
        bot_thinking = f"🧠 **Thinking:** *{thought}*"
        history[-1] = (user_message, bot_thinking)
        yield history, None, None, gr.update(value=code_to_run)
        
        # Execute Code
        local_vars = {'df': state['df_modified'], 'px': px, 'pd': pd, 'np': np}
        stdout, fig_result, df_result, error = safe_exec(code_to_run, local_vars)

        bot_response = bot_thinking + "\n\n---\n\n"
        
        if error:
            bot_response += f"💥 **Execution Error:**\n```\n{error}\n```"
        if stdout:
            bot_response += f"📋 **Output:**\n```\n{stdout}\n```"
        if not error and not stdout and not fig_result and not isinstance(df_result, pd.DataFrame):
            bot_response += "✅ Code executed, but produced no direct output."

        history[-1] = (user_message, bot_response)
        yield history, fig_result, df_result, gr.update(value=code_to_run)

    except Exception as e:
        error_msg = f"A critical error occurred: {e}. The AI may have returned invalid JSON. Check the generated code."
        history[-1] = (user_message, error_msg)
        yield history, None, None, ""

# --- Gradio UI Construction ---

with gr.Blocks(theme=THEME, title="Phoenix AI Data Explorer") as demo:
    global_state = gr.State({})

    gr.Markdown("# 🔥 Phoenix AI Data Explorer")
    gr.Markdown("The next-generation analytic tool. Upload your data to awaken the Phoenix.")

    with gr.Row():
        file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
        api_key_input = gr.Textbox(label="🔑 Gemini API Key", type="password", placeholder="Enter Google AI Studio key...")

    with gr.Tabs(visible=False) as phoenix_tabs:
        with gr.Tab("🦅 Phoenix Eye"):
            phoenix_eye_output = gr.Markdown()

        with gr.Tab("🩺 Data Medic"):
            gr.Markdown("### Cleanse Your Data\nSelect a column with missing values and choose a method to fill them.")
            with gr.Row():
                medic_col_select = gr.Dropdown(label="Select Column to Clean")
                medic_method_select = gr.Radio(['mean', 'median', 'mode'], label="Imputation Method", value='mean')
            medic_preview_btn = gr.Button("📊 Preview Changes")
            medic_plot = gr.Plot()
            with gr.Row():
                medic_apply_btn = gr.Button("✅ Apply & Save Changes", variant="primary")
                medic_status = gr.Textbox(label="Status", interactive=False)
            with gr.Accordion("Download Cleaned Data", open=False):
                download_btn = gr.Button("⬇️ Download Cleaned CSV")
                download_file_output = gr.File(label="Download Link", visible=False)

        with gr.Tab("🔮 The Oracle (Predictive Modeling)"):
            gr.Markdown("### Glimpse the Future\nTrain a simple model to see the predictive power of your data.")
            with gr.Row():
                oracle_target_select = gr.Dropdown(label="🎯 Select Target Variable")
                oracle_feature_select = gr.Multiselect(label="✨ Select Features")
                oracle_model_select = gr.Dropdown(choices=["Random Forest", "Logistic Regression", "Linear Regression"], label="🧠 Select Model")
            oracle_run_btn = gr.Button("🚀 Train Model!", variant="primary")
            oracle_status = gr.Markdown()
            with gr.Row():
                oracle_fig1 = gr.Plot()
                oracle_fig2 = gr.Plot()
        
        with gr.Tab("🤖 AI Co-pilot"):
            gr.Markdown("### Your Conversational Analyst\nAsk any question about your data in plain English.")
            copilot_chatbot = gr.Chatbot(label="Chat History", height=400)
            with gr.Accordion("AI Generated Results", open=True):
                copilot_fig_output = gr.Plot()
                copilot_df_output = gr.Dataframe(interactive=False)
            with gr.Accordion("Generated Code", open=False):
                copilot_code_output = gr.Code(language="python", interactive=False)

            with gr.Row():
                copilot_input = gr.Textbox(label="Your Question", placeholder="e.g., 'What's the correlation between age and salary?'", scale=4)
                copilot_submit_btn = gr.Button("Submit", variant="primary", scale=1)
    
    # --- Event Wiring ---
    file_input.upload(
        fn=prime_data,
        inputs=file_input,
        outputs=[global_state, phoenix_tabs, phoenix_eye_output, medic_col_select, oracle_target_select, oracle_feature_select],
        show_progress="full"
    )

    # Data Medic
    medic_preview_btn.click(medic_preview_imputation, [global_state, medic_col_select, medic_method_select], medic_plot)
    medic_apply_btn.click(medic_apply_imputation, [global_state, medic_col_select, medic_method_select], [global_state, medic_status, medic_col_select])
    download_btn.click(download_cleaned_data, [global_state], download_file_output)

    # Oracle
    oracle_run_btn.click(
        oracle_run_model,
        [global_state, oracle_target_select, oracle_feature_select, oracle_model_select],
        [oracle_fig1, oracle_fig2, oracle_status],
        show_progress="full"
    )

    # AI Co-pilot
    copilot_submit_btn.click(
        copilot_respond,
        [copilot_input, copilot_chatbot, global_state, api_key_input],
        [copilot_chatbot, copilot_fig_output, copilot_df_output, copilot_code_output]
    ).then(lambda: "", copilot_input, copilot_input) # Clear input after submit

if __name__ == "__main__":
    demo.launch(debug=True)