File size: 4,870 Bytes
10e9b7d
 
eccf8e4
3c4371f
4c42a76
808eedd
 
4c42a76
8dce943
4c42a76
3db6293
808eedd
83b4ffd
808eedd
119dab4
4c42a76
 
808eedd
 
 
 
4c42a76
3c4371f
808eedd
 
 
 
5bb8fe1
 
4c42a76
808eedd
 
 
 
 
e80aab9
808eedd
4c42a76
 
808eedd
8dce943
5bb8fe1
808eedd
4c42a76
808eedd
5bb8fe1
3c4371f
808eedd
 
 
4c42a76
 
808eedd
 
4c42a76
808eedd
4c42a76
808eedd
5bb8fe1
4c42a76
 
808eedd
 
4c42a76
5bb8fe1
4c42a76
808eedd
 
 
4c42a76
 
 
 
eccf8e4
808eedd
 
 
8dce943
4c42a76
5bb8fe1
808eedd
 
 
 
31243f4
808eedd
 
 
 
e80aab9
808eedd
 
 
 
 
 
 
 
 
5bb8fe1
4c42a76
808eedd
 
 
4c42a76
 
 
808eedd
 
 
4c42a76
 
 
 
 
5bb8fe1
808eedd
5bb8fe1
808eedd
 
 
 
 
 
 
 
7e4a06b
31243f4
808eedd
 
8dce943
4c42a76
e80aab9
 
4c42a76
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import os
import gradio as gr
import requests
import pandas as pd
from huggingface_hub import InferenceClient
from duckduckgo_search import DDGS
from datasets import load_dataset
import wikipediaapi

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
HF_TOKEN = os.getenv("HF_TOKEN")

# Setup Hugging Face client (advanced model)
llm_model_id = "HuggingFaceH4/zephyr-7b-beta"
hf_client = InferenceClient(llm_model_id, token=HF_TOKEN)

# Wikipedia API setup (corrected user-agent)
wiki_api = wikipediaapi.Wikipedia(
    language='en', 
    user_agent='SmartAgent/1.0 ([email protected])'
)

# Load a subset of Wikipedia dataset (adjust as needed)
wiki_dataset = load_dataset("wikipedia", "20220301.en", split="train[:10000]", trust_remote_code=True)

# Search functions
def duckduckgo_search(query):
    with DDGS() as ddgs:
        results = [r for r in ddgs.text(query, max_results=3)]
        return "\n".join([r["body"] for r in results if r.get("body")]) or "No results found."

def wikipedia_search(query):
    page = wiki_api.page(query)
    return page.summary if page.exists() else "No Wikipedia page found."

# Comprehensive Agent
class SmartAgent:
    def __init__(self):
        pass

    def __call__(self, question: str) -> str:
        q_lower = question.lower()

        if any(term in q_lower for term in ["current", "latest", "2024", "2025", "who is the president", "recent", "live"]):
            return duckduckgo_search(question)

        wiki_result = wikipedia_search(question)
        if "No Wikipedia page found" not in wiki_result:
            return wiki_result

        try:
            resp = hf_client.text_generation(question, max_new_tokens=512)
            return resp
        except Exception as e:
            return f"HF LLM error: {e}"

# Submission logic
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")
    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        return "Please Login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    agent = SmartAgent()
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results_log = []
    answers_payload = []
    correct_answers = 0

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or not question_text:
            continue

        submitted_answer = agent(question_text)
        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Smart Agent Evaluation Runner")
    gr.Markdown("""
        **Instructions:**
        1. Clone this space, define your agent logic, tools, packages, etc.
        2. Log in to Hugging Face.
        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
    """)

    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)