import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import logging
import json
import os

# Set up logging
logger = logging.getLogger(__name__)

def create_apr_vs_agent_hash_graph(df):
    """
    Create a box plot showing APR values distribution for each agent hash version.
    
    Args:
        df: DataFrame containing the APR data with agent_hash column
    
    Returns:
        A Plotly figure object
    """
    if len(df) == 0 or 'agent_hash' not in df.columns:
        logger.error("No data or agent_hash column not found to plot APR vs agent hash graph")
        fig = go.Figure()
        fig.add_annotation(
            text="No agent hash data available",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # Filter for APR data only and ensure agent_hash is not null
    apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
    
    if len(apr_data) == 0:
        logger.error("No valid APR data with agent_hash found")
        fig = go.Figure()
        fig.add_annotation(
            text="No valid APR data with agent_hash found",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # Filter out outliers (APR values above 200 or below -200)
    outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
    apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
    
    # Log the outliers for better debugging
    if len(outlier_data) > 0:
        excluded_count = len(outlier_data)
        logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
        
        # Group outliers by agent for detailed logging
        outlier_agents = outlier_data.groupby('agent_name')
        for agent_name, agent_outliers in outlier_agents:
            logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
            for idx, row in agent_outliers.iterrows():
                logger.info(f"  - APR: {row['apr']}, timestamp: {row['timestamp']}, agent_hash: {row['agent_hash']}")
    
    # Use the filtered data for all subsequent operations
    apr_data = apr_data_filtered
    
    # Create Plotly figure
    fig = go.Figure()
    
    # Add a zero line that spans the entire width
    fig.add_shape(
        type="line",
        line=dict(dash="solid", width=1.5, color="black"),
        y0=0, y1=0,
        x0=-0.5, x1=10,  # Will be adjusted later based on number of boxes
        layer="below"
    )
    
    # Add background shapes for positive and negative regions
    # These will be adjusted later based on the actual x-axis range
    fig.add_shape(
        type="rect",
        fillcolor="rgba(230, 243, 255, 0.3)",
        line=dict(width=0),
        y0=0, y1=100,  # Use a fixed positive value
        x0=-0.5, x1=10,  # Will be adjusted later
        layer="below"
    )
    
    fig.add_shape(
        type="rect",
        fillcolor="rgba(255, 230, 230, 0.3)",
        line=dict(width=0),
        y0=-100, y1=0,  # Use a fixed negative value
        x0=-0.5, x1=10,  # Will be adjusted later
        layer="below"
    )
    
    # Group by agent_hash
    unique_hashes = apr_data['agent_hash'].unique()
    
    # Map for version labels based on hash endings
    version_map = {}
    for hash_val in unique_hashes:
        if hash_val.endswith("tby"):
            version_map[hash_val] = "v0.4.1"
        elif hash_val.endswith("vq"):
            version_map[hash_val] = "v0.4.2"
        else:
            # For any other hashes, use the last 6 characters
            version_map[hash_val] = f"Hash: {hash_val[-6:]}"
    
    # Sort hashes by version (v0.4.1 first, then v0.4.2)
    sorted_hashes = sorted(unique_hashes, key=lambda h: "1" if h.endswith("tby") else "2" if h.endswith("vq") else h)
    
    # Colors for different versions
    version_colors = {
        "v0.4.1": "rgba(31, 119, 180, 0.7)",  # Blue
        "v0.4.2": "rgba(44, 160, 44, 0.7)",   # Green
    }
    
    # Default color for other hashes
    default_color = "rgba(214, 39, 40, 0.7)"  # Red
    
    # Prepare data for box plots and statistics
    box_data = []
    version_stats = {}
    
    # X-axis positions and labels
    x_positions = []
    x_labels = []
    
    # Process each hash to create box plot data
    for i, agent_hash in enumerate(sorted_hashes):
        hash_data = apr_data[apr_data['agent_hash'] == agent_hash]
        
        # Get agent name for this hash (should be the same for all records with this hash)
        agent_name = hash_data['agent_name'].iloc[0] if not hash_data.empty else "Unknown"
        
        # Get version label
        version = version_map[agent_hash]
        
        # Choose color based on version
        if version in version_colors:
            color = version_colors[version]
        else:
            color = default_color
        
        # Calculate statistics for this hash
        apr_values = hash_data['apr'].tolist()
        median_apr = hash_data['apr'].median()
        mean_apr = hash_data['apr'].mean()
        min_apr = hash_data['apr'].min()
        max_apr = hash_data['apr'].max()
        count = len(apr_values)
        
        # Store statistics for later use
        if version not in version_stats:
            version_stats[version] = {
                'apr_values': [],
                'count': 0,
                'hashes': []
            }
        
        version_stats[version]['apr_values'].extend(apr_values)
        version_stats[version]['count'] += count
        version_stats[version]['hashes'].append(agent_hash)
        
        # Create label with version only (no hash)
        label = f"{version}"
        
        # Add to x-axis positions and labels
        x_positions.append(i)
        x_labels.append(label)
        
        # Create hover text with detailed statistics
        hover_text = (
            f"Version: {version}<br>"
            f"Agent: {agent_name}<br>"
            f"Hash: {agent_hash}<br>"
            f"Median APR: {median_apr:.2f}%<br>"
            f"Mean APR: {mean_apr:.2f}%<br>"
            f"Min APR: {min_apr:.2f}%<br>"
            f"Max APR: {max_apr:.2f}%<br>"
            f"Data points: {count}"
        )
        
        # Add box plot for this hash
        fig.add_trace(
            go.Box(
                y=apr_values,
                x=[i] * len(apr_values),  # Position on x-axis
                name=label,
                boxpoints='outliers',  # Show only outlier points instead of all points
                jitter=0.1,  # Reduced jitter for less horizontal spread
                pointpos=0,  # Position of points relative to box
                marker=dict(
                    color=color,
                    size=6,  # Smaller point size
                    opacity=0.7,  # Add transparency
                    line=dict(width=1, color='black')
                ),
                line=dict(
                    color='black',
                    width=2  # Thicker line for better visibility
                ),
                fillcolor=color,
                hoverinfo='text',
                hovertext=hover_text,
                showlegend=False,
                boxmean=True,  # Show mean as a dashed line
                whiskerwidth=0.8,  # Slightly thinner whiskers
                width=0.6  # Wider boxes
            )
        )
        
        logger.info(f"Added box plot for agent hash {agent_hash} ({version}) with {count} points")
        
        # Add text annotation with median value above each box
        fig.add_annotation(
            x=i,
            y=median_apr + 5,  # Position above the box
            text=f"{median_apr:.1f}%",
            showarrow=False,
            font=dict(
                family="Arial, sans-serif",
                size=12,
                color="black",
                weight="bold"
            )
        )
    
    # Calculate improvement metrics between versions
    if "v0.4.1" in version_stats and "v0.4.2" in version_stats:
        v041_values = version_stats["v0.4.1"]["apr_values"]
        v042_values = version_stats["v0.4.2"]["apr_values"]
        
        v041_median = pd.Series(v041_values).median()
        v042_median = pd.Series(v042_values).median()
        
        improvement = v042_median - v041_median
        improvement_pct = (improvement / abs(v041_median)) * 100 if v041_median != 0 else float('inf')
        
        # Determine if the change is positive or negative
        is_improvement = improvement > 0
        change_color = "green" if is_improvement else "red"
        change_text = "improvement" if is_improvement else "decrease"
        
        # Add annotation showing improvement with better styling (black color)
        fig.add_annotation(
            x=(len(sorted_hashes) - 1) / 2,  # Center of the x-axis
            y=90,  # Top of the chart
            text=f"<b>Version Comparison:</b> {abs(improvement):.2f}% {change_text} from v0.4.1 to v0.4.2",
            showarrow=False,
            font=dict(
                family="Arial, sans-serif",
                size=16,
                color="black",  # Changed to black
                weight="bold"
            ),
            bgcolor="rgba(255, 255, 255, 0.9)",
            bordercolor="black",  # Changed to black
            borderwidth=2,
            borderpad=6,
            opacity=0.9
        )
    
    # Update the shapes to match the actual x-axis range
    num_boxes = len(sorted_hashes)
    fig.update_shapes(
        dict(x0=-0.5, x1=num_boxes - 0.5),
        selector=dict(type='rect')
    )
    fig.update_shapes(
        dict(x0=-0.5, x1=num_boxes - 0.5),
        selector=dict(type='line')
    )
    
    # Update layout with improved styling
    fig.update_layout(
        title=dict(
            text="Performance Graph",
            font=dict(
                family="Arial, sans-serif",
                size=24,  # Larger title
                color="black",
                weight="bold"
            ),
            x=0.5,  # Center the title
            y=0.95  # Position slightly higher
        ),
        xaxis_title=dict(
            text="Agent Version",
            font=dict(
                family="Arial, sans-serif",
                size=18,  # Larger axis title
                color="black",
                weight="bold"
            )
        ),
        yaxis_title=None,  # Remove the y-axis title as we'll use annotations instead
        template="plotly_white",
        height=900,  # Increased height for better visualization and more vertical space
        # width parameter removed to allow full responsiveness
        autosize=True,  # Enable auto-sizing for responsiveness
        boxmode='group',  # Group boxes together
        margin=dict(r=50, l=120, t=100, b=100),  # Reduced right margin since guide was removed
        hovermode="closest",
        plot_bgcolor='rgba(250,250,250,0.9)',  # Slightly off-white background
        paper_bgcolor='white',
        font=dict(
            family="Arial, sans-serif",
            size=14,
            color="black"
        ),
        showlegend=False
    )
    
    # Add single annotation for y-axis
    fig.add_annotation(
        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
        y=0,      # Center of the y-axis
        xref="paper",
        yref="y",
        text="Agent APR [%]",
        showarrow=False,
        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        textangle=-90,  # Rotate text to be vertical
        align="center"
    )
    
    # Box plot guide removed as per user request
    
    # Update y-axis with autoscaling
    fig.update_yaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        # Use autoscaling
        autorange=True,  # Enable autoscaling
        tickformat=".2f",  # Format tick labels with 2 decimal places
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        title=None  # Remove the built-in axis title since we're using annotations
    )
    
    # Update x-axis with custom labels
    fig.update_xaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        tickmode='array',
        tickvals=x_positions,
        ticktext=x_labels,
        tickangle=-45,  # Angle the labels for better readability
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")  # Adjusted font size
    )
    
    try:
        # Save the figure
        graph_file = "modius_apr_vs_agent_hash_graph.html"
        fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
        
        # Also save as image for compatibility
        img_file = "modius_apr_vs_agent_hash_graph.png"
        try:
            fig.write_image(img_file)
            logger.info(f"APR vs agent hash graph saved to {graph_file} and {img_file}")
        except Exception as e:
            logger.error(f"Error saving image: {e}")
            logger.info(f"APR vs agent hash graph saved to {graph_file} only")
        
        # Return the figure object for direct use in Gradio
        return fig
    except Exception as e:
        logger.error(f"Error creating APR vs agent hash graph: {e}")
        
        # Create a simpler graph as fallback
        simple_fig = go.Figure()
        
        # Add zero line
        simple_fig.add_shape(
            type="line",
            line=dict(dash="solid", width=1.5, color="black"),
            y0=0, y1=0,
            x0=-0.5, x1=1.5  # Fixed values for error case
        )
        
        # Add a note about the error
        simple_fig.add_annotation(
            text=f"Error creating graph: {str(e)}",
            x=0.5, y=0.5,
            showarrow=False,
            font=dict(size=15, color="red")
        )
        
        return simple_fig

def save_apr_vs_agent_hash_to_csv(df):
    """
    Save the APR vs agent hash data to a CSV file.
    
    Args:
        df: DataFrame containing the APR data with agent_hash column
    
    Returns:
        The path to the saved CSV file, or None if no data was saved
    """
    if df.empty or 'agent_hash' not in df.columns:
        logger.error("No data or agent_hash column not found to save to CSV")
        return None
    
    # Filter for APR data only and ensure agent_hash is not null
    apr_data = df[(df['metric_type'] == 'APR') & (df['agent_hash'].notna())].copy()
    
    if apr_data.empty:
        logger.error("No valid APR data with agent_hash found to save to CSV")
        return None
    
    # Define the CSV file path
    csv_file = "modius_apr_vs_agent_hash.csv"
    
    # Save to CSV
    apr_data.to_csv(csv_file, index=False)
    logger.info(f"APR vs agent hash data saved to {csv_file}")
    
    return csv_file

def generate_apr_vs_agent_hash_visualizations(df):
    """
    Generate APR vs agent hash visualizations.
    
    Args:
        df: DataFrame containing the APR data
    
    Returns:
        A tuple containing the Plotly figure object and the path to the saved CSV file
    """
    if df.empty:
        logger.info("No APR data available for agent hash visualization.")
        # Create empty visualization with a message using Plotly
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5,
            text="No APR data available for agent hash visualization",
            font=dict(size=20),
            showarrow=False
        )
        fig.update_layout(
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
        
        return fig, None
    
    # Check if agent_hash column exists
    if 'agent_hash' not in df.columns:
        logger.error("agent_hash column not found in DataFrame")
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5,
            text="agent_hash column not found in data",
            font=dict(size=20),
            showarrow=False
        )
        fig.update_layout(
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
        
        return fig, None
    
    # Save to CSV before creating visualization
    csv_file = save_apr_vs_agent_hash_to_csv(df)
    
    # Create the visualization
    fig = create_apr_vs_agent_hash_graph(df)
    
    return fig, csv_file