import os
import re
import gradio as gr
import requests
import pandas as pd
from huggingface_hub import InferenceClient
from duckduckgo_search import DDGS
import wikipediaapi
from bs4 import BeautifulSoup
import pdfplumber

# ==== CONFIG ====
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
HF_TOKEN = os.getenv("HF_TOKEN")

# SOTA models: for general and code queries
CONVERSATIONAL_MODELS = [
    "deepseek-ai/DeepSeek-V2-Chat",
    "Qwen/Qwen2-72B-Instruct",
    "mistralai/Mixtral-8x22B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3-70B-Instruct"
]
CODING_MODEL = "deepseek-ai/DeepSeek-Coder-33B-Instruct"

wiki_api = wikipediaapi.Wikipedia(language="en", user_agent="SmartAgent/1.0 (chockqoteewy@gmail.com)")

# ==== UTILITIES ====
def extract_links(text):
    url_pattern = re.compile(r'(https?://[^\s\)\],]+)')
    return url_pattern.findall(text or "")

def download_file(url, out_dir="tmp_files"):
    os.makedirs(out_dir, exist_ok=True)
    filename = url.split("/")[-1].split("?")[0]
    local_path = os.path.join(out_dir, filename)
    try:
        r = requests.get(url, timeout=20)
        r.raise_for_status()
        with open(local_path, "wb") as f:
            f.write(r.content)
        return local_path
    except Exception:
        return None

def analyze_file(file_path):
    try:
        if file_path.endswith((".xlsx", ".xls")):
            df = pd.read_excel(file_path)
            return f"Excel summary: {df.head().to_markdown(index=False)}"
        elif file_path.endswith(".csv"):
            df = pd.read_csv(file_path)
            return f"CSV summary: {df.head().to_markdown(index=False)}"
        elif file_path.endswith(".pdf"):
            with pdfplumber.open(file_path) as pdf:
                first_page = pdf.pages[0].extract_text()
                return f"PDF text sample: {first_page[:1000]}"
        elif file_path.endswith(".txt"):
            with open(file_path, encoding='utf-8') as f:
                txt = f.read()
            return f"TXT file sample: {txt[:1000]}"
        else:
            return f"Unsupported file type: {file_path}"
    except Exception as e:
        return f"File analysis error: {e}"

def analyze_webpage(url):
    try:
        r = requests.get(url, timeout=15)
        soup = BeautifulSoup(r.text, "lxml")
        title = soup.title.string if soup.title else "No title"
        paragraphs = [p.get_text() for p in soup.find_all("p")]
        article_sample = "\n".join(paragraphs[:5])
        return f"Webpage Title: {title}\nContent sample:\n{article_sample[:1200]}"
    except Exception as e:
        return f"Webpage error: {e}"

def duckduckgo_search(query):
    try:
        with DDGS() as ddgs:
            results = [r for r in ddgs.text(query, max_results=3)]
            bodies = [r.get("body", "") for r in results if r.get("body")]
            return "\n".join(bodies) if bodies else None
    except Exception:
        return None

def wikipedia_search(query):
    try:
        page = wiki_api.page(query)
        if page.exists() and page.summary:
            return page.summary
    except Exception:
        return None
    return None

def is_coding_question(text):
    code_terms = [
        "python", "java", "c++", "code", "function", "write a", "script", "algorithm",
        "bug", "traceback", "error", "output", "compile", "debug"
    ]
    if any(term in (text or "").lower() for term in code_terms):
        return True
    if re.search(r"```.+```", text or "", re.DOTALL):
        return True
    return False

def llm_coder(query):
    try:
        hf_client = InferenceClient(CODING_MODEL, token=HF_TOKEN)
        result = hf_client.text_generation(query, max_new_tokens=1024)
        if isinstance(result, dict) and "generated_text" in result:
            return f"[{CODING_MODEL}] {result['generated_text']}"
        elif isinstance(result, str):
            return f"[{CODING_MODEL}] {result}"
        return "Unknown result format from coder model."
    except Exception as e:
        return f"Coder Model Error: {e}"

def llm_conversational(query):
    last_error = None
    for model_id in CONVERSATIONAL_MODELS:
        try:
            hf_client = InferenceClient(model_id, token=HF_TOKEN)
            result = hf_client.text_generation(query, max_new_tokens=512)
            if isinstance(result, dict) and "generated_text" in result:
                return f"[{model_id}] {result['generated_text']}"
            elif isinstance(result, str):
                return f"[{model_id}] {result}"
        except Exception as e:
            last_error = f"{model_id}: {e}"
    return f"LLM Error (all advanced models): {last_error or 'Unknown error'}"

# ==== SMART AGENT ====
class SmartAgent:
    def __init__(self):
        pass

    def __call__(self, question: str) -> str:
        # 1. Handle file/link
        links = extract_links(question)
        if links:
            results = []
            for url in links:
                if re.search(r"\.xlsx|\.xls|\.csv|\.pdf|\.txt", url):
                    local = download_file(url)
                    if local:
                        file_analysis = analyze_file(local)
                        results.append(f"File ({url}):\n{file_analysis}")
                    else:
                        results.append(f"Could not download file: {url}")
                else:
                    results.append(analyze_webpage(url))
            if results:
                return "\n\n".join(results)

        # 2. Code/coding questions: use coder model
        if is_coding_question(question):
            result = llm_coder(question)
            if result:
                return result

        # 3. DuckDuckGo for fresh web results
        result = duckduckgo_search(question)
        if result:
            return result

        # 4. Wikipedia for encyclopedic facts
        result = wikipedia_search(question)
        if result:
            return result

        # 5. General QA, reasoning, or fallback: conversational SOTA models
        result = llm_conversational(question)
        if result:
            return result

        return "No answer could be found by available models."

# ==== SUBMISSION LOGIC ====
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
    else:
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    agent = SmartAgent()
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results_log = []
    answers_payload = []

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or not question_text:
            continue
        submitted_answer = agent(question_text)
        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# ==== GRADIO UI ====
with gr.Blocks() as demo:
    gr.Markdown("# Smart Agent Evaluation Runner")
    gr.Markdown("""
        **Instructions:**
        1. Clone this space, define your agent logic, tools, packages, etc.
        2. Log in to Hugging Face.
        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
    """)
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)