import streamlit as st
import google.generativeai as genai
import zipfile
import io
import json
import os  # Still needed for API key potentially, but not model names
from pathlib import Path
import time
import plotly.express as px
import pandas as pd

# --- Configuration ---
MAX_PROMPT_TOKENS_ESTIMATE = 800000  # Keep this estimate
RESULTS_PAGE_SIZE = 25

AVAILABLE_ANALYSES = {  # Keep analyses config
    "generate_docs": "Generate Missing Docstrings/Comments",
    "find_bugs": "Identify Potential Bugs & Anti-patterns",
    "check_style": "Check Style Guide Compliance (General)",
    "summarize_modules": "Summarize Complex Modules/Files",
    "suggest_refactoring": "Suggest Refactoring Opportunities",
}
CODE_EXTENSIONS = {
    '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb',
    '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
}

# --- Session State Initialization ---
if 'mock_api_call' not in st.session_state:
    st.session_state.mock_api_call = False
if 'analysis_results' not in st.session_state:
    st.session_state.analysis_results = None
if 'error_message' not in st.session_state:
    st.session_state.error_message = None
if 'analysis_requested' not in st.session_state:
    st.session_state.analysis_requested = False
if 'selected_model_name' not in st.session_state:
    st.session_state.selected_model_name = None  # Will hold the "models/..." name
if 'available_models_dict' not in st.session_state:
    st.session_state.available_models_dict = {}  # Mapping display_name -> name

# --- Gemini API Setup & Model Discovery ---
model = None  # Global variable for the initialized model instance

@st.cache_data(ttl=3600)
def get_available_models():
    """Lists models supporting 'generateContent' using the API key."""
    model_dict = {}
    try:
        if 'GEMINI_API_KEY' not in st.secrets:
            print("API key not found in secrets during model listing attempt.")
            return {}
        genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
        print("Listing available models via API...")
        for m in genai.list_models():
            if 'generateContent' in m.supported_generation_methods:
                model_dict[m.display_name] = m.name
        print(f"Found {len(model_dict)} compatible models.")
        return model_dict
    except Exception as e:
        st.error(f"🚨 Error listing available models: {e}")
        return {}

def initialize_gemini_model():
    """Initializes the Gemini model based on the selected name."""
    global model
    selected_name = st.session_state.get('selected_model_name')
    if selected_name and model is None and not st.session_state.mock_api_call:
        try:
            if 'GEMINI_API_KEY' not in st.secrets:
                st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
                st.stop()
            genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
            print(f"Initializing Gemini Model: {selected_name}")
            model = genai.GenerativeModel(model_name=selected_name)
            print(f"Gemini Model Initialized ({selected_name}).")
            return True
        except Exception as e:
            st.error(f"🚨 Error initializing selected Gemini model '{selected_name}': {e}")
            st.session_state.selected_model_name = None
            st.stop()
            return False
    elif st.session_state.mock_api_call:
        return True
    elif model is not None and model.model_name == selected_name:
        return True
    elif model is not None and model.model_name != selected_name:
        print("Model changed. Re-initializing...")
        model = None
        return initialize_gemini_model()
    elif not selected_name and not st.session_state.mock_api_call:
        return False
    return False

# --- Helper Functions ---
def estimate_token_count(text):
    """
    Estimates the token count.
    If a string is provided, calculates based on its length.
    If an integer (e.g. total char count) is provided, uses that directly.
    """
    if isinstance(text, int):
        return text // 3
    return len(text) // 3

@st.cache_data(max_entries=5)
def process_zip_file_cached(file_id, file_size, file_content_bytes):
    """
    Processes a ZIP file and extracts code files.
    Returns (code_files dict, total_chars, file_count, ignored_files list).
    """
    code_files = {}
    total_chars = 0
    file_count = 0
    ignored_files = []
    status_placeholder = st.empty()
    progress_bar = status_placeholder.progress(0)
    try:
        with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
            members = zip_ref.infolist()
            total_members = len(members)
            for i, member in enumerate(members):
                if i % 10 == 0:
                    progress_bar.progress(int((i / total_members) * 100))
                if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename:
                    continue
                file_path = Path(member.filename)
                if file_path.suffix.lower() in CODE_EXTENSIONS:
                    try:
                        with zip_ref.open(member) as file:
                            file_bytes = file.read()
                            try:
                                content = file_bytes.decode('utf-8')
                            except UnicodeDecodeError:
                                try:
                                    content = file_bytes.decode('latin-1')
                                except Exception as decode_err:
                                    ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
                                    continue
                            code_files[member.filename] = content
                            total_chars += len(content)
                            file_count += 1
                    except Exception as read_err:
                        ignored_files.append(f"{member.filename} (Read Error: {read_err})")
                else:
                    if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
                        ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
            progress_bar.progress(100)
            status_placeholder.empty()
    except zipfile.BadZipFile:
        status_placeholder.empty()
        st.error("🚨 Invalid ZIP.")
        return None, 0, 0, []
    except Exception as e:
        status_placeholder.empty()
        st.error(f"🚨 ZIP Error: {e}")
        return None, 0, 0, []
    if file_count == 0:
        if not ignored_files:
            st.warning("No code files found.")
        else:
            st.warning("No code files found; some skipped.")
    return code_files, total_chars, file_count, ignored_files

def construct_analysis_prompt(code_files_dict, requested_analyses):
    """
    Constructs the prompt for analysis by including code files and a JSON structure for output.
    Returns the full prompt and a list of included files.
    """
    prompt_parts = ["Analyze the following codebase...\n\n"]
    current_token_estimate = estimate_token_count(prompt_parts[0])
    included_files = []
    code_segments = []
    prompt_status = st.empty()
    
    if len(code_files_dict) > 50:
        prompt_status.info("Constructing prompt...")
        
    for filename, content in code_files_dict.items():
        segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
        segment_token_estimate = estimate_token_count(segment)
        if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
            code_segments.append(segment)
            current_token_estimate += segment_token_estimate
            included_files.append(filename)
        else:
            st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
            break
    prompt_status.empty()
    
    if not included_files:
        st.error("🚨 No code files included in prompt.")
        return None, []
    
    prompt_parts.append("".join(code_segments))
    json_structure_description = "{\n"
    structure_parts = []
    
    if "generate_docs" in requested_analyses:
        structure_parts.append('    "documentation_suggestions": [...]')
    if "find_bugs" in requested_analyses:
        structure_parts.append('    "potential_bugs": [...]')
    if "check_style" in requested_analyses:
        structure_parts.append('    "style_issues": [...]')
    if "summarize_modules" in requested_analyses:
        structure_parts.append('    "module_summaries": [...]')
    if "suggest_refactoring" in requested_analyses:
        structure_parts.append('    "refactoring_suggestions": [...]')
        
    json_structure_description += ",\n".join(structure_parts) + "\n}"
    prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
    prompt_parts.append(prompt_footer)
    
    full_prompt = "".join(prompt_parts)
    return full_prompt, included_files

def call_gemini_api(prompt):
    """
    Calls the Gemini API using the provided prompt.
    Returns the parsed JSON insights or an error message.
    """
    if not prompt:
        return None, "Prompt generation failed."
    
    if st.session_state.mock_api_call:
        st.info(" MOCK MODE: Simulating API call...")
        time.sleep(1)
        mock_json_response = json.dumps({
            "documentation_suggestions": [],
            "potential_bugs": [],
            "style_issues": [],
            "module_summaries": [],
            "refactoring_suggestions": []
        })
        st.success("Mock response generated.")
        return json.loads(mock_json_response), None
    else:
        if not initialize_gemini_model():
            return None, "Gemini Model Initialization Failed."
        if model is None:
            return None, "Gemini model not selected or available."
        try:
            api_status = st.empty()
            api_status.info(f"📡 Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
            start_time = time.time()
            response = model.generate_content(
                prompt,
                generation_config=genai.types.GenerationConfig(temperature=0.2),
                safety_settings=[
                    {"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
                    for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
                              "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
                ]
            )
            end_time = time.time()
            api_status.success(f"✅ Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.")
            time.sleep(1)
            api_status.empty()
            try:
                json_response_text = response.text.strip()
                if json_response_text.startswith("```json"):
                    json_response_text = json_response_text[7:]
                if json_response_text.startswith("```"):
                    json_response_text = json_response_text[3:]
                if json_response_text.endswith("```"):
                    json_response_text = json_response_text[:-3]
                json_start = json_response_text.find('{')
                json_end = json_response_text.rfind('}') + 1
                if json_start != -1 and json_end != -1 and json_end > json_start:
                    final_json_text = json_response_text[json_start:json_end]
                    insights = json.loads(final_json_text)
                    return insights, None
                else:
                    st.warning("⚠️ Could not find valid JSON object.")
                    return {"raw_response": response.text}, "AI response did not contain clear JSON object."
            except json.JSONDecodeError as json_err:
                st.error(f"🚨 Error parsing JSON: {json_err}")
                st.code(response.text, language='text')
                return None, f"AI response not valid JSON: {json_err}"
            except AttributeError:
                st.error("🚨 Unexpected API response structure (AttributeError).")
                st.code(f"Response object: {response}", language='text')
                return None, "Unexpected response structure (AttributeError)."
            except Exception as e:
                st.error(f"🚨 Unexpected issue processing response: {e}")
                try:
                    st.code(f"Response object: {response}", language='text')
                except Exception:
                    pass
                return None, f"Unexpected response structure: {e}"
        except Exception as e:
            api_status.empty()
            st.error(f"🚨 API call error: {e}")
            error_msg = f"API call failed: {e}"
            if hasattr(e, 'message'):
                if "429" in e.message:
                    error_msg = "API Quota Exceeded or Rate Limit hit."
                elif "API key not valid" in e.message:
                    error_msg = "Invalid Gemini API Key."
                elif "permission denied" in e.message.lower():
                    error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
                elif "blocked" in e.message.lower():
                    error_msg = "Content blocked due to safety settings."
            elif "block_reason: SAFETY" in str(e):
                error_msg = "Content blocked due to safety settings."
            return None, error_msg

def display_results(results_json, requested_analyses):
    """
    Displays the analysis results with pagination and allows JSON download.
    """
    st.header("📊 Analysis Report")
    if not isinstance(results_json, dict):
        st.error("Invalid results format.")
        st.json(results_json)
        return
    if "raw_response" in results_json:
        st.subheader("Raw AI Response (JSON Parsing Failed)")
        st.code(results_json["raw_response"], language='text')
        return

    display_config = {
        "generate_docs": {
            "key": "documentation_suggestions",
            "title": AVAILABLE_ANALYSES["generate_docs"],
            "fields": {"file": "File", "line": "Line"}
        },
        "find_bugs": {
            "key": "potential_bugs",
            "title": AVAILABLE_ANALYSES["find_bugs"],
            "fields": {"file": "File", "line": "Line", "severity": "Severity"}
        },
        "check_style": {
            "key": "style_issues",
            "title": AVAILABLE_ANALYSES["check_style"],
            "fields": {"file": "File", "line": "Line"}
        },
        "summarize_modules": {
            "key": "module_summaries",
            "title": AVAILABLE_ANALYSES["summarize_modules"],
            "fields": {"file": "File"}
        },
        "suggest_refactoring": {
            "key": "refactoring_suggestions",
            "title": AVAILABLE_ANALYSES["suggest_refactoring"],
            "fields": {"file": "File", "line": "Line", "area": "Area"}
        },
    }
    any_results_found = False
    for analysis_key in requested_analyses:
        if analysis_key in display_config:
            config = display_config[analysis_key]
            items = results_json.get(config["key"], [])
            total_items = len(items)
            st.subheader(f"{config['title']} ({total_items} found)")
            if items:
                any_results_found = True
                state_key = f"visible_{analysis_key}"
                if state_key not in st.session_state:
                    st.session_state[state_key] = RESULTS_PAGE_SIZE
                visible_count = st.session_state[state_key]
                items_to_display = items[:visible_count]
                for item in items_to_display:
                    details = [
                        f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file'
                        else f"**{field_label}:** {item.get(field_key, 'N/A')}"
                        for field_key, field_label in config["fields"].items()
                        if item.get(field_key, 'N/A') != 'N/A'
                    ]
                    st.markdown("- " + " - ".join(details))
                    if 'suggestion' in item:
                        st.code(item['suggestion'], language='text')
                    elif 'description' in item:
                        st.markdown(f"  > {item['description']}")
                    elif 'summary' in item:
                        st.markdown(f"  > {item['summary']}")
                if total_items > visible_count:
                    if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
                        st.session_state[state_key] += RESULTS_PAGE_SIZE
                        st.rerun()
            else:
                st.markdown("_No items found for this category._")
            st.divider()
    if not any_results_found:
        st.info("No specific findings were identified.")
    st.download_button(
        label="Download Full Report (JSON)",
        data=json.dumps(results_json, indent=4),
        file_name="code_audit_report.json",
        mime="application/json"
    )

# --- Streamlit App Main Interface ---
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
st.title("🤖 Codebase Audit & Documentation Assistant")

# --- Sidebar with Enhancements ---
with st.sidebar:
    # Dark Mode Toggle
    dark_mode = st.checkbox("Enable Dark Mode", value=False)
    if dark_mode:
        st.markdown(
            """
            <style>
            .reportview-container, .main {
                background-color: #2E2E2E;
                color: white;
            }
            </style>
            """,
            unsafe_allow_html=True
        )
    st.header("⚙️ Analysis Controls")
    st.session_state.mock_api_call = st.toggle(
        "🧪 Enable Mock API Mode",
        value=st.session_state.mock_api_call,
        help="Use fake data instead of calling Gemini API."
    )

    st.divider()
    st.header("♊ Select Model")
    if not st.session_state.mock_api_call:
        st.session_state.available_models_dict = get_available_models()
        model_display_names = list(st.session_state.available_models_dict.keys())
        if model_display_names:
            current_model_display_name = None
            if st.session_state.selected_model_name:
                for disp_name, internal_name in st.session_state.available_models_dict.items():
                    if internal_name == st.session_state.selected_model_name:
                        current_model_display_name = disp_name
                        break
            try:
                selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
            except ValueError:
                selected_index = 0
            selected_display_name = st.selectbox(
                "Choose Gemini model:",
                options=model_display_names,
                index=selected_index,
                key="model_selector",
                help="Select the Gemini model to use for analysis."
            )
            st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
            st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
        elif 'GEMINI_API_KEY' in st.secrets:
            st.warning("No compatible models found or error listing models. Check API Key permissions.")
            st.session_state.selected_model_name = None
        else:
            st.warning("Add GEMINI_API_KEY to secrets to list models.")
            st.session_state.selected_model_name = None
    else:
        st.info("Mock API Mode ACTIVE")
        st.session_state.selected_model_name = "mock_model"
    
    st.divider()
    st.header("🔎 Select Analyses")
    selected_analyses = [
        key for key, name in AVAILABLE_ANALYSES.items()
        if st.checkbox(name, value=True, key=f"cb_{key}")
    ]
    st.divider()
    st.header("📄 How To Use")
    st.info(
        "1. Set API Key.\n"
        "2. Toggle Mock Mode if needed.\n"
        "3. Select Model (if not Mock).\n"
        "4. Select analyses.\n"
        "5. Upload ZIP.\n"
        "6. Click 'Analyze'.\n"
        "7. Review report."
    )
    st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
    st.divider()
    st.warning("⚠️ **Privacy:** Code sent to Google API if Mock Mode is OFF.")

# Update title dynamically based on selected model
if st.session_state.selected_model_name and not st.session_state.mock_api_call:
    st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
elif st.session_state.mock_api_call:
    st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
else:
    st.markdown("Upload codebase (`.zip`) for analysis.")

# --- Main Content Area ---
uploaded_file = st.file_uploader(
    "📁 Upload Codebase ZIP File",
    type=['zip'],
    key="file_uploader",
    on_change=lambda: st.session_state.update(
        analysis_results=None,
        error_message=None,
        analysis_requested=False
    )
)
analysis_button_placeholder = st.empty()
results_placeholder = st.container()

if uploaded_file:
    st.success(f"✅ File '{uploaded_file.name}' uploaded.")
    uploaded_file_bytes = uploaded_file.getvalue()
    file_id = f"{uploaded_file.name}-{uploaded_file.size}"
    code_files, total_chars, file_count, ignored_files = process_zip_file_cached(
        file_id, uploaded_file.size, uploaded_file_bytes
    )
    if code_files is not None:
        st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
        # --- Interactive Metrics Visualization ---
        metrics = {
            "Metric": ["Files Analyzed", "Total Characters", "Token Estimate", "Ignored Files"],
            "Value": [file_count, total_chars, estimate_token_count(total_chars), len(ignored_files)]
        }
        df_metrics = pd.DataFrame(metrics)
        fig = px.bar(df_metrics, x="Metric", y="Value", title="Upload Summary Metrics")
        st.plotly_chart(fig)
        # --- End Metrics Visualization ---
        if ignored_files:
            with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
                st.code("\n".join(ignored_files), language='text')
        
        model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
        analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
        analyze_button_label = "Analyze Codebase"
        if not model_ready:
            analyze_button_label = "Select Model First"
        elif analyze_button_disabled:
            analyze_button_label = "Select Analyses or Upload Valid Code"

        if analysis_button_placeholder.button(
            analyze_button_label,
            type="primary",
            disabled=analyze_button_disabled
        ):
            st.session_state.analysis_requested = True
            st.session_state.analysis_results = None
            st.session_state.error_message = None
            if not selected_analyses:
                st.warning("Please select analysis types.")
            elif file_count == 0:
                st.warning("No relevant code files found.")
            elif not model_ready:
                st.warning("Please select a Gemini model from the sidebar.")
            else:
                with results_placeholder:
                    spinner_model_name = (
                        st.session_state.selected_model_name
                        if not st.session_state.mock_api_call
                        else "Mock Mode"
                    )
                    spinner_msg = f"🚀 Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
                    with st.spinner(spinner_msg):
                        analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
                        if analysis_prompt and included_files_in_prompt:
                            results_json, error_msg = call_gemini_api(analysis_prompt)
                            st.session_state.analysis_results = results_json
                            st.session_state.error_message = error_msg
                        elif not included_files_in_prompt:
                            st.session_state.error_message = "Could not proceed: No files included."
                        else:
                            st.session_state.error_message = "Failed to generate analysis prompt."
                st.rerun()

if st.session_state.analysis_requested:
    with results_placeholder:
        st.divider()
        if st.session_state.error_message:
            st.error(f"Analysis Failed: {st.session_state.error_message}")
            if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
                st.subheader("Raw AI Response")
                st.code(st.session_state.analysis_results["raw_response"], language='text')
        elif st.session_state.analysis_results:
            display_results(st.session_state.analysis_results, selected_analyses)
        else:
            st.info("Analysis initiated, but no results/errors stored.")
elif not uploaded_file:
    results_placeholder.info("Upload a ZIP file to begin.")

results_placeholder.divider()
results_placeholder.markdown("_Assistant powered by Google Gemini._")