File size: 3,817 Bytes
10e9b7d
 
eccf8e4
3c4371f
5bb8fe1
 
 
8dce943
3db6293
e80aab9
5bb8fe1
 
 
 
83b4ffd
5bb8fe1
 
 
 
 
 
 
 
3c4371f
5bb8fe1
 
 
 
e80aab9
5bb8fe1
31243f4
5bb8fe1
 
 
 
31243f4
5bb8fe1
8dce943
5bb8fe1
 
 
 
 
 
 
 
 
 
3c4371f
5bb8fe1
 
 
 
 
 
 
 
eccf8e4
5bb8fe1
8dce943
5bb8fe1
 
 
 
e80aab9
31243f4
 
 
5bb8fe1
 
 
 
e80aab9
5bb8fe1
 
e80aab9
5bb8fe1
 
 
 
 
 
e80aab9
5bb8fe1
 
 
 
 
7e4a06b
31243f4
5bb8fe1
 
8dce943
5bb8fe1
e80aab9
 
5bb8fe1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import gradio as gr
import requests
import pandas as pd
from huggingface_hub import InferenceClient
from duckduckgo_search import DDGS
from datasets import load_dataset

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# Hugging Face Token (set in environment)
HF_TOKEN = os.environ.get("HF_TOKEN")
deepseek_model = "deepseek-ai/DeepSeek-R1"
hf_client = InferenceClient(model=deepseek_model, token=HF_TOKEN)

# Load Wikipedia dataset (small subset for efficient retrieval)
wiki_dataset = load_dataset("wikipedia", "20220301.en", split="train[:10000]")

def search_wikipedia(question):
    results = wiki_dataset.filter(lambda x: question.lower() in x["text"].lower())
    if len(results):
        return results[0]["text"][:1000]  # limit to first 1000 chars
    return "No relevant information found on Wikipedia."

def duckduckgo_search(query):
    with DDGS() as ddgs:
        results = [r["body"] for r in ddgs.text(query, max_results=3)]
        return "\n".join(results) if results else "No results found."

def ask_deepseek(prompt, max_tokens=512):
    try:
        response = hf_client.text_generation(
            prompt, max_new_tokens=max_tokens, temperature=0.2, repetition_penalty=1.1
        )
        return response
    except Exception as e:
        return f"DeepSeek Error: {e}"

class SmartAgent:
    def __call__(self, question: str) -> str:
        q_lower = question.lower()
        if any(term in q_lower for term in ["current", "latest", "2024", "2025", "recent", "live", "today", "now"]):
            return duckduckgo_search(question)
        deepseek_response = ask_deepseek(question)
        if "DeepSeek Error" not in deepseek_response and deepseek_response.strip():
            return deepseek_response
        # fallback to Wikipedia if DeepSeek fails
        return search_wikipedia(question)

def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please Login to Hugging Face with the button.", None
    username = profile.username
    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"
    agent_code = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
    
    try:
        agent = SmartAgent()
    except Exception as e:
        return f"Agent Error: {e}", None

    questions_data = requests.get(questions_url).json()
    results_log, answers_payload = [], []

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if task_id and question_text:
            answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})

    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
    response = requests.post(submit_url, json=submission_data).json()

    final_status = (
        f"Submission Successful!\n"
        f"User: {response.get('username')}\n"
        f"Overall Score: {response.get('score', 'N/A')}%\n"
        f"({response.get('correct_count', '?')}/{response.get('total_attempted', '?')} correct)\n"
        f"Message: {response.get('message', 'No message received.')}"
    )

    return final_status, pd.DataFrame(results_log)

with gr.Blocks() as demo:
    gr.Markdown("# Smart Agent Evaluation Runner")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Answers")

    run_button.click(run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True)