File size: 3,107 Bytes
b56f671
 
 
 
 
 
15bb146
 
b56f671
 
997480e
15bb146
997480e
15bb146
997480e
 
 
 
 
 
b56f671
997480e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0c134a
997480e
 
d0c134a
b56f671
997480e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bb146
 
997480e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
"""
πŸš€ Enhanced GAIA Agent Interface - Full API Integration
Complete Gradio interface for GAIA benchmark with API connectivity and scoring
"""

import os
import gradio as gr
import json
from datetime import datetime
from gaia_agent import ModularGAIAAgent

agent = ModularGAIAAgent()

def run_api_questions():
    results = agent.run(from_api=True)
    answers = ""
    for r in results:
        answers += f"Task ID: {r['task_id']}\nAnswer: {r['answer']}\nReasoning Trace: {' | '.join(r['reasoning_trace'])}\n\n"
    return answers

def run_manual_question(question):
    qobj = {"task_id": "manual", "question": question, "file_name": ""}
    answer, trace = agent.answer_question(qobj)
    return answer, "\n".join(trace)

def show_help():
    return (
        "# Agent Capabilities\n"
        "- Multi-modal QA (text, audio, image, code, table, YouTube/video)\n"
        "- File download and analysis from API\n"
        "- Advanced video QA: object detection, captioning, ASR\n"
        "- Secure code execution\n"
        "- Robust error handling and logging\n"
        "- GAIA-compliant output\n"
        "\nSee README.md for full details."
    )

def submit_answers(username, agent_code_url):
    # Placeholder for submission logic
    return f"Submission for {username} with code {agent_code_url} (not implemented in demo)"

def show_leaderboard():
    # Placeholder for leaderboard logic
    return "Leaderboard feature coming soon."

demo = gr.Blocks(title="GAIA Benchmark Agent", theme=gr.themes.Soft())
with demo:
    gr.Markdown("""
    # πŸ€– GAIA Benchmark Agent
    Multi-modal, multi-step reasoning agent for the Hugging Face GAIA benchmark.
    """)
    with gr.Tabs():
        with gr.TabItem("API Q&A"):
            api_btn = gr.Button("Run on API Questions", variant="primary")
            api_output = gr.Textbox(label="Answers and Reasoning Trace", lines=20)
            api_btn.click(run_api_questions, outputs=api_output)
        with gr.TabItem("Manual Input"):
            manual_q = gr.Textbox(label="Enter your question", lines=3)
            manual_btn = gr.Button("Answer", variant="primary")
            manual_a = gr.Textbox(label="Answer")
            manual_trace = gr.Textbox(label="Reasoning Trace", lines=5)
            manual_btn.click(run_manual_question, inputs=manual_q, outputs=[manual_a, manual_trace])
        with gr.TabItem("Submission/Leaderboard"):
            username = gr.Textbox(label="Hugging Face Username")
            code_url = gr.Textbox(label="Agent Code URL")
            submit_btn = gr.Button("Submit Answers", variant="primary")
            submit_out = gr.Textbox(label="Submission Result")
            submit_btn.click(submit_answers, inputs=[username, code_url], outputs=submit_out)
            leaderboard_btn = gr.Button("Show Leaderboard")
            leaderboard_out = gr.Textbox(label="Leaderboard")
            leaderboard_btn.click(show_leaderboard, outputs=leaderboard_out)
        with gr.TabItem("Agent Help"):
            help_md = gr.Markdown(show_help())

if __name__ == "__main__":
    demo.launch()