Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,29 +3,462 @@ import gradio as gr
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
# (Keep Constants as is)
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def __init__(self):
|
| 15 |
-
print("
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 23 |
"""
|
| 24 |
-
Fetches all questions, runs the
|
| 25 |
and displays the results.
|
| 26 |
"""
|
| 27 |
-
|
| 28 |
-
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 29 |
|
| 30 |
if profile:
|
| 31 |
username= f"{profile.username}"
|
|
@@ -34,24 +467,30 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 34 |
print("User not logged in.")
|
| 35 |
return "Please Login to Hugging Face with the button.", None
|
| 36 |
|
|
|
|
|
|
|
|
|
|
| 37 |
api_url = DEFAULT_API_URL
|
| 38 |
questions_url = f"{api_url}/questions"
|
| 39 |
submit_url = f"{api_url}/submit"
|
| 40 |
|
| 41 |
-
# 1. Instantiate Agent
|
| 42 |
try:
|
| 43 |
-
agent =
|
|
|
|
| 44 |
except Exception as e:
|
| 45 |
print(f"Error instantiating agent: {e}")
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# 2. Fetch Questions
|
| 52 |
print(f"Fetching questions from: {questions_url}")
|
| 53 |
try:
|
| 54 |
-
response = requests.get(questions_url, timeout=
|
| 55 |
response.raise_for_status()
|
| 56 |
questions_data = response.json()
|
| 57 |
if not questions_data:
|
|
@@ -73,33 +512,52 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 73 |
results_log = []
|
| 74 |
answers_payload = []
|
| 75 |
print(f"Running agent on {len(questions_data)} questions...")
|
|
|
|
|
|
|
|
|
|
| 76 |
for item in questions_data:
|
|
|
|
| 77 |
task_id = item.get("task_id")
|
| 78 |
question_text = item.get("question")
|
|
|
|
|
|
|
|
|
|
| 79 |
if not task_id or question_text is None:
|
| 80 |
print(f"Skipping item with missing task_id or question: {item}")
|
|
|
|
| 81 |
continue
|
|
|
|
|
|
|
| 82 |
try:
|
| 83 |
-
|
|
|
|
| 84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 85 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 86 |
except Exception as e:
|
| 87 |
-
print(f"
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
if not answers_payload:
|
| 91 |
print("Agent did not produce any answers to submit.")
|
| 92 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 93 |
|
| 94 |
-
# 4. Prepare Submission
|
| 95 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 96 |
-
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 97 |
print(status_update)
|
| 98 |
|
| 99 |
# 5. Submit
|
| 100 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 101 |
try:
|
| 102 |
-
response = requests.post(submit_url, json=submission_data, timeout=
|
| 103 |
response.raise_for_status()
|
| 104 |
result_data = response.json()
|
| 105 |
final_status = (
|
|
@@ -142,19 +600,28 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 142 |
|
| 143 |
# --- Build Gradio Interface using Blocks ---
|
| 144 |
with gr.Blocks() as demo:
|
| 145 |
-
gr.Markdown("#
|
| 146 |
gr.Markdown(
|
| 147 |
"""
|
| 148 |
**Instructions:**
|
| 149 |
|
| 150 |
-
1.
|
| 151 |
-
2.
|
| 152 |
-
3.
|
|
|
|
|
|
|
| 153 |
|
| 154 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
**Disclaimers:**
|
| 156 |
-
|
| 157 |
-
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
| 158 |
"""
|
| 159 |
)
|
| 160 |
|
|
@@ -163,8 +630,7 @@ with gr.Blocks() as demo:
|
|
| 163 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 164 |
|
| 165 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 166 |
-
|
| 167 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 168 |
|
| 169 |
run_button.click(
|
| 170 |
fn=run_and_submit_all,
|
|
@@ -173,9 +639,8 @@ with gr.Blocks() as demo:
|
|
| 173 |
|
| 174 |
if __name__ == "__main__":
|
| 175 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 176 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 177 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 178 |
-
space_id_startup = os.getenv("SPACE_ID")
|
| 179 |
|
| 180 |
if space_host_startup:
|
| 181 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
@@ -183,14 +648,22 @@ if __name__ == "__main__":
|
|
| 183 |
else:
|
| 184 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 185 |
|
| 186 |
-
if space_id_startup:
|
| 187 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 188 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 189 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 190 |
else:
|
| 191 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 194 |
|
| 195 |
-
print("Launching Gradio Interface for
|
| 196 |
demo.launch(debug=True, share=False)
|
|
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
+
import json
|
| 7 |
+
import mimetypes
|
| 8 |
+
import glob
|
| 9 |
+
import time # Added for timing
|
| 10 |
+
|
| 11 |
+
# LangChain/LangGraph imports
|
| 12 |
+
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
| 13 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 14 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 15 |
+
from langchain_core.tools import tool
|
| 16 |
+
from langchain_openai import ChatOpenAI
|
| 17 |
+
from langgraph.graph import END, StateGraph
|
| 18 |
+
from langgraph.prebuilt import ToolExecutor
|
| 19 |
+
from langchain_core.utils.function_calling import format_tool_to_openai_function # Helper for tool formatting if needed
|
| 20 |
+
|
| 21 |
+
# Tool Imports
|
| 22 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 23 |
+
from langchain_community.document_loaders import TextLoader, PyPDFLoader # Import specific loaders
|
| 24 |
+
from pathlib import Path # For file path handling
|
| 25 |
|
|
|
|
| 26 |
# --- Constants ---
|
| 27 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 28 |
+
FILE_DOWNLOAD_BASE_URL = f"{DEFAULT_API_URL}/files"
|
| 29 |
+
# Define a directory where files might be downloaded or located
|
| 30 |
+
DATA_DIR = "./data" # Use this as a base for downloaded files too
|
| 31 |
+
|
| 32 |
+
# --- Ensure Data Directory Exists ---
|
| 33 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
| 34 |
+
print(f"Data directory '{DATA_DIR}' ensured.")
|
| 35 |
+
|
| 36 |
+
# --- Environment Variable Check ---
|
| 37 |
+
required_env_vars = ["OPENAI_API_KEY", "TAVILY_API_KEY"]
|
| 38 |
+
missing_vars = [var for var in required_env_vars if not os.getenv(var)]
|
| 39 |
+
|
| 40 |
+
if missing_vars:
|
| 41 |
+
print("\n" + "="*50)
|
| 42 |
+
print("ERROR: Missing required environment variables!")
|
| 43 |
+
print("Please set the following environment variables:")
|
| 44 |
+
for var in missing_vars:
|
| 45 |
+
print(f"- {var}")
|
| 46 |
+
print(f"\nThey should be set in the Hugging Face Space settings under 'Variables'.")
|
| 47 |
+
print("You will not be able to run the agent without these.")
|
| 48 |
+
print("="*50 + "\n")
|
| 49 |
+
# In a real app, you might raise an error or disable functionality
|
| 50 |
+
|
| 51 |
+
# --- Define Tools ---
|
| 52 |
+
|
| 53 |
+
@tool
|
| 54 |
+
def list_local_files(directory: str = DATA_DIR) -> str:
|
| 55 |
+
"""List files available locally in a directory. Defaults to the DATA_DIR."""
|
| 56 |
+
try:
|
| 57 |
+
# List all files and directories, but focus on files
|
| 58 |
+
# Use ** to search recursively within task_id subdirectories as well
|
| 59 |
+
entries = glob.glob(os.path.join(directory, '**/*'), recursive=True)
|
| 60 |
+
files = [entry for entry in entries if os.path.isfile(entry)]
|
| 61 |
+
if not files:
|
| 62 |
+
return f"No files found in local directory: {directory} (and subdirectories)"
|
| 63 |
+
# Return paths relative to DATA_DIR for clarity
|
| 64 |
+
file_list = "\n".join([os.path.relpath(f, DATA_DIR) for f in files])
|
| 65 |
+
return f"Files available locally in {directory} (and subdirectories):\n{file_list}"
|
| 66 |
+
except Exception as e:
|
| 67 |
+
return f"Error listing local files in {directory}: {e}"
|
| 68 |
+
|
| 69 |
+
@tool
|
| 70 |
+
def read_local_file(filepath: str, directory: str = DATA_DIR) -> str:
|
| 71 |
+
"""Read the content of a specified local file within a directory. Supports .txt and .pdf. Filepath should be relative to the directory (e.g., task_id/filename.txt). Defaults to the DATA_DIR."""
|
| 72 |
+
# Sanitize filepath to prevent directory traversal
|
| 73 |
+
base_path = Path(directory).resolve()
|
| 74 |
+
full_path = (base_path / filepath).resolve()
|
| 75 |
+
|
| 76 |
+
# Check if the resolved path is actually within the intended base directory
|
| 77 |
+
try:
|
| 78 |
+
full_path.relative_to(base_path)
|
| 79 |
+
except ValueError:
|
| 80 |
+
return f"Error: Access denied. Filepath '{filepath}' is outside the allowed directory '{directory}'."
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
if not full_path.exists():
|
| 84 |
+
return f"Error: Local file not found at {full_path}"
|
| 85 |
+
if not full_path.is_file():
|
| 86 |
+
return f"Error: {filepath} is not a local file."
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
mime_type, _ = mimetypes.guess_type(full_path)
|
| 90 |
+
print(f"Attempting to read file {full_path} with mime type {mime_type}")
|
| 91 |
+
|
| 92 |
+
if mime_type == 'application/pdf':
|
| 93 |
+
loader = PyPDFLoader(str(full_path))
|
| 94 |
+
docs = loader.load()
|
| 95 |
+
content = "\n".join([doc.page_content for doc in docs])
|
| 96 |
+
return f"Content of {filepath} (PDF):\n{content[:4000]}..." # Increased limit slightly
|
| 97 |
+
elif mime_type and mime_type.startswith('text/'):
|
| 98 |
+
loader = TextLoader(str(full_path))
|
| 99 |
+
docs = loader.load()
|
| 100 |
+
content = "\n".join([doc.page_content for doc in docs])
|
| 101 |
+
return f"Content of {filepath} (Text):\n{content[:4000]}..." # Increased limit slightly
|
| 102 |
+
else:
|
| 103 |
+
# Fallback for other types - try reading as text
|
| 104 |
+
try:
|
| 105 |
+
loader = TextLoader(str(full_path))
|
| 106 |
+
docs = loader.load()
|
| 107 |
+
content = "\n".join([doc.page_content for doc in docs])
|
| 108 |
+
return f"Content of {filepath} (Attempted Text Read):\n{content[:4000]}..." # Increased limit slightly
|
| 109 |
+
except Exception:
|
| 110 |
+
return f"Error: Unsupported local file type for {filepath}. Mime type: {mime_type}. Cannot read content as text."
|
| 111 |
+
|
| 112 |
+
except Exception as e:
|
| 113 |
+
return f"Error reading local file {filepath}: {e}"
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@tool
|
| 117 |
+
def download_and_read_file(task_id: str, filename: str) -> str:
|
| 118 |
+
"""Downloads a file provided for a specific task ID from the evaluation API and then reads its content. Saves files to ./data/{task_id}/"""
|
| 119 |
+
download_url = f"{FILE_DOWNLOAD_BASE_URL}/{task_id}"
|
| 120 |
+
save_dir = os.path.join(DATA_DIR, str(task_id))
|
| 121 |
+
os.makedirs(save_dir, exist_ok=True) # Ensure task-specific directory exists
|
| 122 |
+
save_path = os.path.join(save_dir, filename)
|
| 123 |
+
|
| 124 |
+
print(f"Attempting to download file '{filename}' for task '{task_id}' from {download_url}")
|
| 125 |
+
|
| 126 |
+
try:
|
| 127 |
+
# The API endpoint /files/{task_id} seems to return a zip or archive
|
| 128 |
+
# Let's assume for now it returns the *specific* file if requested via query param or it handles internally?
|
| 129 |
+
# The prompt mentions GET /files/{task_id} which implies the task_id is the key,
|
| 130 |
+
# but the example question didn't specify how files are listed or referenced beyond their name.
|
| 131 |
+
# A common GAIA pattern is the API returning a list of file *metadata* or an archive.
|
| 132 |
+
# Let's adjust: Assume the API at /files/{task_id} returns a ZIP containing *all* files for that task.
|
| 133 |
+
# We need to download the zip, extract it, and *then* read the specific file.
|
| 134 |
+
|
| 135 |
+
# Revised tool logic: Download the task_id archive, extract it, then read the requested filename.
|
| 136 |
+
archive_url = f"{FILE_DOWNLOAD_BASE_URL}/{task_id}"
|
| 137 |
+
print(f"Downloading archive for task {task_id} from {archive_url}")
|
| 138 |
+
response = requests.get(archive_url, stream=True, timeout=30) # Increased timeout
|
| 139 |
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
| 140 |
+
|
| 141 |
+
archive_path = os.path.join(save_dir, f"task_{task_id}_files.zip")
|
| 142 |
+
with open(archive_path, 'wb') as f:
|
| 143 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 144 |
+
f.write(chunk)
|
| 145 |
+
print(f"Downloaded archive to {archive_path}")
|
| 146 |
+
|
| 147 |
+
# Extract the archive
|
| 148 |
+
import zipfile
|
| 149 |
+
extract_dir = os.path.join(save_dir, "extracted")
|
| 150 |
+
os.makedirs(extract_dir, exist_ok=True)
|
| 151 |
+
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
| 152 |
+
zip_ref.extractall(extract_dir)
|
| 153 |
+
print(f"Extracted archive to {extract_dir}")
|
| 154 |
+
|
| 155 |
+
# Now, find the requested filename within the extracted directory
|
| 156 |
+
# Use glob to find the file, case-insensitively and allowing for potential path variations inside the zip
|
| 157 |
+
search_pattern = os.path.join(extract_dir, '**', filename)
|
| 158 |
+
found_files = glob.glob(search_pattern, recursive=True, include_hidden=True)
|
| 159 |
+
|
| 160 |
+
if not found_files:
|
| 161 |
+
return f"Error: File '{filename}' not found within the archive for task {task_id} after extraction to {extract_dir}. Files found: {list_local_files(extract_dir)}" # List files found in extracted dir
|
| 162 |
+
|
| 163 |
+
if len(found_files) > 1:
|
| 164 |
+
print(f"Warning: Found multiple files matching '{filename}' in archive for task {task_id}. Using the first one: {found_files[0]}")
|
| 165 |
+
|
| 166 |
+
file_to_read_path = found_files[0] # Use the first match
|
| 167 |
+
|
| 168 |
+
# Now read the extracted file using the existing read logic
|
| 169 |
+
# We can reuse parts of read_local_file or just inline it
|
| 170 |
+
print(f"Attempting to read extracted file: {file_to_read_path}")
|
| 171 |
+
mime_type, _ = mimetypes.guess_type(file_to_read_path)
|
| 172 |
+
print(f"Detected mime type: {mime_type}")
|
| 173 |
+
|
| 174 |
+
content = ""
|
| 175 |
+
if mime_type == 'application/pdf':
|
| 176 |
+
loader = PyPDFLoader(str(file_to_read_path))
|
| 177 |
+
docs = loader.load()
|
| 178 |
+
content = "\n".join([doc.page_content for doc in docs])
|
| 179 |
+
elif mime_type and mime_type.startswith('text/'):
|
| 180 |
+
loader = TextLoader(str(file_to_read_path))
|
| 181 |
+
docs = loader.load()
|
| 182 |
+
content = "\n".join([doc.page_content for doc in docs])
|
| 183 |
+
else:
|
| 184 |
+
# Fallback for other types - try reading as text
|
| 185 |
+
try:
|
| 186 |
+
loader = TextLoader(str(file_to_read_path))
|
| 187 |
+
docs = loader.load()
|
| 188 |
+
content = "\n".join([doc.page_content for doc in docs])
|
| 189 |
+
except Exception:
|
| 190 |
+
return f"Error: Unsupported file type for '{filename}' ({file_to_read_path}). Mime type: {mime_type}. Cannot read content after download and extraction."
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
return f"Successfully downloaded and read '{filename}' for task {task_id}. Content:\n{content[:4000]}..." # Limit output size
|
| 194 |
+
|
| 195 |
+
except requests.exceptions.RequestException as e:
|
| 196 |
+
return f"Error downloading archive for task {task_id}: {e}"
|
| 197 |
+
except zipfile.BadZipFile:
|
| 198 |
+
return f"Error: Downloaded file for task {task_id} is not a valid zip archive."
|
| 199 |
+
except FileNotFoundError:
|
| 200 |
+
return f"Error: The file '{filename}' was expected but not found in the extracted archive for task {task_id}."
|
| 201 |
+
except Exception as e:
|
| 202 |
+
import traceback
|
| 203 |
+
traceback.print_exc()
|
| 204 |
+
return f"An unexpected error occurred during download or reading of '{filename}' for task {task_id}: {e}"
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
# Initialize Tavily Search Tool
|
| 208 |
+
tavily_tool = TavilySearchResults(max_results=5)
|
| 209 |
+
|
| 210 |
+
# List of tools available to the agent
|
| 211 |
+
# Added the new download_and_read_file tool
|
| 212 |
+
tools = [tavily_tool, list_local_files, read_local_file, download_and_read_file]
|
| 213 |
+
|
| 214 |
+
# --- Define the LangGraph Agent ---
|
| 215 |
+
|
| 216 |
+
# Define the state for the graph
|
| 217 |
+
# Added 'task_id' to the state
|
| 218 |
+
class AgentState:
|
| 219 |
+
messages: list
|
| 220 |
+
task_id: str | None = None # Store the current task ID
|
| 221 |
+
|
| 222 |
+
# Define nodes
|
| 223 |
+
def call_llm(state: AgentState):
|
| 224 |
+
"""Invokes the LLM to make a decision or generate a response."""
|
| 225 |
+
messages = state['messages']
|
| 226 |
+
task_id = state.get('task_id') # Get task_id from state if available
|
| 227 |
+
print(f"\n---Calling LLM (Task ID: {task_id})---")
|
| 228 |
+
# Provide task_id in the prompt if helpful? Or rely on tool signature?
|
| 229 |
+
# Relying on tool signature is better. The LLM should understand it needs task_id for the download tool.
|
| 230 |
+
|
| 231 |
+
# Add a system message to guide the agent? Optional but often helpful.
|
| 232 |
+
# We can bind this to the LLM later or add it here dynamically.
|
| 233 |
+
# Let's add a basic system message when invoking the graph.
|
| 234 |
+
# For now, just invoke with the current messages.
|
| 235 |
|
| 236 |
+
response = llm_with_tools.invoke(messages)
|
| 237 |
+
print(f"LLM Response type: {type(response)}")
|
| 238 |
+
print(f"LLM Response: {response}")
|
| 239 |
+
return {'messages': messages + [response], 'task_id': task_id}
|
| 240 |
+
|
| 241 |
+
def call_tool(state: AgentState):
|
| 242 |
+
"""Executes the tools specified by the LLM."""
|
| 243 |
+
messages = state['messages']
|
| 244 |
+
task_id = state.get('task_id') # Get task_id from state
|
| 245 |
+
last_message = messages[-1]
|
| 246 |
+
print(f"\n---Executing Tools (Task ID: {task_id})---")
|
| 247 |
+
|
| 248 |
+
tool_outputs = []
|
| 249 |
+
# Ensure the last message is a tool call message
|
| 250 |
+
if not hasattr(last_message, 'tool_calls') or not last_message.tool_calls:
|
| 251 |
+
print("Last message was not a tool call message. This shouldn't happen if routing is correct.")
|
| 252 |
+
# Return state unchanged or with an error message? Returning unchanged might lead to loop.
|
| 253 |
+
# Let's add a message indicating the issue.
|
| 254 |
+
tool_outputs.append(ToolMessage(content="Agent attempted to call tools but the last message didn't contain tool calls.", tool_call_id="error"))
|
| 255 |
+
return {'messages': messages + tool_outputs, 'task_id': task_id}
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
for tool_call in last_message.tool_calls:
|
| 259 |
+
print(f"Executing tool: {tool_call.tool} with args: {tool_call.args} (Call ID: {tool_call.id})")
|
| 260 |
+
try:
|
| 261 |
+
# The tool_executor.invoke expects the message *containing* the tool calls
|
| 262 |
+
# It handles calling the correct tool function based on the message content
|
| 263 |
+
output = tool_executor.invoke(last_message)
|
| 264 |
+
# The output structure from ToolExecutor.invoke(message_with_tool_calls)
|
| 265 |
+
# can be complex (e.g., list of dicts or messages).
|
| 266 |
+
# A simpler approach is to manually call the tool functions using tool_call.args
|
| 267 |
+
# and construct ToolMessage with the corresponding tool_call.id.
|
| 268 |
+
|
| 269 |
+
# Find the tool function by name
|
| 270 |
+
tool_function = next((t for t in tools if t.name == tool_call.tool), None)
|
| 271 |
+
if tool_function:
|
| 272 |
+
# Execute the tool function with parsed arguments
|
| 273 |
+
# Be careful: tool_call.args is a dict, need to unpack it
|
| 274 |
+
# Ensure args match tool signature, this relies on LLM's correctness
|
| 275 |
+
print(f"Attempting to call {tool_call.tool}(**{tool_call.args})")
|
| 276 |
+
# Need to handle potential errors in unpacking args or tool execution
|
| 277 |
+
try:
|
| 278 |
+
# Pass task_id to the tool args if the tool expects it
|
| 279 |
+
# The download_and_read_file tool expects task_id and filename
|
| 280 |
+
# The LLM *must* include task_id in the args when calling this tool
|
| 281 |
+
if 'task_id' in tool_call.args and tool_call.tool == 'download_and_read_file':
|
| 282 |
+
tool_result = tool_function.invoke(tool_call.args) # Use invoke for LangChain tools
|
| 283 |
+
else:
|
| 284 |
+
# For other tools, just pass the args provided by the LLM
|
| 285 |
+
tool_result = tool_function.invoke(tool_call.args) # Use invoke
|
| 286 |
+
|
| 287 |
+
tool_outputs.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call.id))
|
| 288 |
+
print(f"Tool '{tool_call.tool}' output: {tool_result[:200]}...")
|
| 289 |
+
except Exception as e:
|
| 290 |
+
print(f"Error executing tool {tool_call.tool} with args {tool_call.args}: {e}")
|
| 291 |
+
import traceback
|
| 292 |
+
traceback.print_exc()
|
| 293 |
+
tool_outputs.append(ToolMessage(content=f"Error executing {tool_call.tool}: {e}", tool_call_id=tool_call.id))
|
| 294 |
+
else:
|
| 295 |
+
print(f"Error: Tool '{tool_call.tool}' not found.")
|
| 296 |
+
tool_outputs.append(ToolMessage(content=f"Error: Tool '{tool_call.tool}' not found.", tool_call_id=tool_call.id))
|
| 297 |
+
|
| 298 |
+
except Exception as e:
|
| 299 |
+
print(f"An unexpected error occurred during tool execution for {tool_call.tool} (ID: {tool_call.id}): {e}")
|
| 300 |
+
import traceback
|
| 301 |
+
traceback.print_exc()
|
| 302 |
+
tool_outputs.append(ToolMessage(content=f"An unexpected error occurred during tool execution for {tool_call.tool}: {e}", tool_call_id=tool_call.id))
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
# Append tool outputs to the messages
|
| 306 |
+
# Each output needs to be linked back to the tool call ID
|
| 307 |
+
return {'messages': messages + tool_outputs, 'task_id': task_id}
|
| 308 |
+
|
| 309 |
+
# Define conditional edge logic
|
| 310 |
+
def route_tools(state: AgentState):
|
| 311 |
+
"""Routes the agent based on whether the LLM decided to use a tool."""
|
| 312 |
+
last_message = state['messages'][-1]
|
| 313 |
+
print(f"\n---Routing (Task ID: {state.get('task_id')})---")
|
| 314 |
+
print(f"Last message type: {type(last_message)}")
|
| 315 |
+
print(f"Last message content: {last_message.content}")
|
| 316 |
+
print(f"Last message tool_calls: {hasattr(last_message, 'tool_calls') and last_message.tool_calls}")
|
| 317 |
+
|
| 318 |
+
# If the last message has tool calls, route to the tool execution node
|
| 319 |
+
if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
|
| 320 |
+
print("Routing to tool_execution")
|
| 321 |
+
return 'tool_execution'
|
| 322 |
+
# Otherwise, route to the end node (assuming it's a final answer)
|
| 323 |
+
print("Routing to end")
|
| 324 |
+
return END
|
| 325 |
+
|
| 326 |
+
# Initialize the LLM and bind tools
|
| 327 |
+
try:
|
| 328 |
+
# Add a system prompt to guide the LLM
|
| 329 |
+
system_prompt = (
|
| 330 |
+
"You are a helpful AI assistant designed to answer complex real-world questions based on provided tools."
|
| 331 |
+
"You have access to web search and the ability to list and read local files, including downloading files provided with a task."
|
| 332 |
+
"Follow these steps:"
|
| 333 |
+
"1. Carefully analyze the user's question."
|
| 334 |
+
"2. Break down the question into smaller steps required to find the answer."
|
| 335 |
+
"3. Use the available tools (`tavily_search`, `list_local_files`, `read_local_file`, `download_and_read_file`) to gather necessary information."
|
| 336 |
+
" - If the question mentions files and you have the task_id available in your state, use the `download_and_read_file` tool with the correct `task_id` and `filename` to get file contents."
|
| 337 |
+
" - If files are already downloaded or you need to see what's available locally, use `list_local_files`."
|
| 338 |
+
" - Use `read_local_file` to read files already on disk (e.g., after downloading or if pre-loaded)."
|
| 339 |
+
" - Use `tavily_search` for general knowledge or looking up information online."
|
| 340 |
+
"4. Synthesize the information gathered from the tools."
|
| 341 |
+
"5. Formulate the final answer based *only* on the information you have retrieved or reasoned about."
|
| 342 |
+
"6. Ensure the answer is in the exact format requested by the user (e.g., comma-separated list, specific ordering)."
|
| 343 |
+
"7. If you have sufficient information and the answer is ready, provide the final answer. Otherwise, use tools or ask for clarification if necessary (though for this benchmark, you must attempt to answer)."
|
| 344 |
+
"Do NOT make up information."
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) # gpt-4o-mini is cheaper and often sufficient
|
| 348 |
+
llm_with_tools = llm.bind_tools(tools)
|
| 349 |
+
print("LLM and tools initialized.")
|
| 350 |
+
except Exception as e:
|
| 351 |
+
print(f"Error initializing LLM or binding tools: {e}")
|
| 352 |
+
llm = None
|
| 353 |
+
llm_with_tools = None
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
# Initialize the ToolExecutor (though we are manually invoking tools in call_tool node)
|
| 357 |
+
# We keep this mainly for reference or potential future use if we switch back
|
| 358 |
+
tool_executor = ToolExecutor(tools)
|
| 359 |
+
print("Tool executor initialized.")
|
| 360 |
+
|
| 361 |
+
# Build the graph
|
| 362 |
+
workflow = StateGraph(AgentState)
|
| 363 |
+
|
| 364 |
+
# Add nodes
|
| 365 |
+
workflow.add_node("llm", call_llm)
|
| 366 |
+
workflow.add_node("tool_execution", call_tool)
|
| 367 |
+
|
| 368 |
+
# Set the entry point
|
| 369 |
+
workflow.set_entry_point("llm")
|
| 370 |
+
|
| 371 |
+
# Add edges
|
| 372 |
+
workflow.add_edge("tool_execution", "llm") # After executing tools, always go back to the LLM to decide next step
|
| 373 |
+
|
| 374 |
+
# Add conditional edge from LLM
|
| 375 |
+
workflow.add_conditional_edge(
|
| 376 |
+
"llm",
|
| 377 |
+
route_tools,
|
| 378 |
+
{"tool_execution": "tool_execution", END: END}
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
# Compile the graph
|
| 382 |
+
try:
|
| 383 |
+
app = workflow.compile()
|
| 384 |
+
print("LangGraph compiled successfully.")
|
| 385 |
+
except Exception as e:
|
| 386 |
+
print(f"Error compiling LangGraph workflow: {e}")
|
| 387 |
+
import traceback
|
| 388 |
+
traceback.print_exc()
|
| 389 |
+
app = None
|
| 390 |
+
|
| 391 |
+
# --- Agent Definition using LangGraph ---
|
| 392 |
+
class GaiaAgent:
|
| 393 |
def __init__(self):
|
| 394 |
+
print("GaiaAgent initialized.")
|
| 395 |
+
if app is None or llm_with_tools is None:
|
| 396 |
+
raise RuntimeError("Agent failed to initialize due to missing API keys or graph compilation error.")
|
| 397 |
+
self.agent_app = app # Store the compiled graph
|
| 398 |
+
self.system_prompt = system_prompt # Store the system prompt
|
| 399 |
+
|
| 400 |
+
# Modified __call__ to accept task_id
|
| 401 |
+
def __call__(self, task_id: str, question: str) -> str:
|
| 402 |
+
print(f"\nAgent received question for Task ID {task_id}: {question}")
|
| 403 |
+
|
| 404 |
+
if self.agent_app is None:
|
| 405 |
+
print("Agent app is not compiled. Cannot process question.")
|
| 406 |
+
return "Agent failed to initialize."
|
| 407 |
+
|
| 408 |
+
try:
|
| 409 |
+
# Run the LangGraph workflow
|
| 410 |
+
# The initial state includes the system prompt, the human message, and the task_id
|
| 411 |
+
initial_messages = [
|
| 412 |
+
AIMessage(content=self.system_prompt), # Start with System prompt guidance (as an AI message often works well with tool calling)
|
| 413 |
+
HumanMessage(content=question)
|
| 414 |
+
]
|
| 415 |
+
print("Invoking LangGraph agent...")
|
| 416 |
+
# Pass task_id in the initial state
|
| 417 |
+
final_state = self.agent_app.invoke(
|
| 418 |
+
{"messages": initial_messages, "task_id": task_id},
|
| 419 |
+
# config={"recursion_limit": 50} # Optional: increase recursion depth
|
| 420 |
+
)
|
| 421 |
+
print("LangGraph agent finished.")
|
| 422 |
+
# print(f"Final state messages: {final_state['messages']}") # Can be verbose
|
| 423 |
+
|
| 424 |
+
# Extract the final answer from the last AIMessage
|
| 425 |
+
final_message = final_state['messages'][-1]
|
| 426 |
+
if isinstance(final_message, AIMessage):
|
| 427 |
+
final_answer = final_message.content
|
| 428 |
+
print(f"Agent returning answer: {final_answer}")
|
| 429 |
+
return final_answer
|
| 430 |
+
else:
|
| 431 |
+
# If the last message isn't an AIMessage, it means the agent didn't converge
|
| 432 |
+
# or the routing is incorrect. Return the last message content or an error.
|
| 433 |
+
# This might happen if the agent ends after a tool call and doesn't produce a final AIMessage.
|
| 434 |
+
error_message = f"Agent finished in an unexpected state. Last message type: {type(final_message)}. Content: {final_message}"
|
| 435 |
+
print(error_message)
|
| 436 |
+
# Attempt to extract *any* useful content from the last message if not AIMessage
|
| 437 |
+
content_attempt = ""
|
| 438 |
+
if hasattr(final_message, 'content'):
|
| 439 |
+
content_attempt = str(final_message.content)
|
| 440 |
+
elif hasattr(final_message, 'tool_outputs'):
|
| 441 |
+
content_attempt = "Tool Outputs: " + json.dumps(final_message.tool_outputs)
|
| 442 |
+
else:
|
| 443 |
+
content_attempt = str(final_message)
|
| 444 |
+
|
| 445 |
+
return f"Agent finished without a final answer message. Last output: {content_attempt[:500]}"
|
| 446 |
+
|
| 447 |
+
except Exception as e:
|
| 448 |
+
print(f"Error during agent execution: {e}")
|
| 449 |
+
import traceback
|
| 450 |
+
traceback.print_exc()
|
| 451 |
+
return f"An error occurred during agent execution: {e}"
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
# --- Rest of the original code (kept mostly as is) ---
|
| 455 |
|
| 456 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 457 |
"""
|
| 458 |
+
Fetches all questions, runs the GaiaAgent on them, submits all answers,
|
| 459 |
and displays the results.
|
| 460 |
"""
|
| 461 |
+
space_id = os.getenv("SPACE_ID")
|
|
|
|
| 462 |
|
| 463 |
if profile:
|
| 464 |
username= f"{profile.username}"
|
|
|
|
| 467 |
print("User not logged in.")
|
| 468 |
return "Please Login to Hugging Face with the button.", None
|
| 469 |
|
| 470 |
+
if missing_vars:
|
| 471 |
+
return f"Missing required environment variables: {', '.join(missing_vars)}. Please set them in your Space settings.", None
|
| 472 |
+
|
| 473 |
api_url = DEFAULT_API_URL
|
| 474 |
questions_url = f"{api_url}/questions"
|
| 475 |
submit_url = f"{api_url}/submit"
|
| 476 |
|
| 477 |
+
# 1. Instantiate Agent
|
| 478 |
try:
|
| 479 |
+
agent = GaiaAgent()
|
| 480 |
+
print("GaiaAgent instantiated successfully.")
|
| 481 |
except Exception as e:
|
| 482 |
print(f"Error instantiating agent: {e}")
|
| 483 |
+
import traceback
|
| 484 |
+
traceback.print_exc()
|
| 485 |
+
return f"Error initializing agent: {e}\nCheck your environment variables.", None
|
| 486 |
+
|
| 487 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local development environment - No Space ID"
|
| 488 |
+
print(f"Agent code link: {agent_code}")
|
| 489 |
|
| 490 |
# 2. Fetch Questions
|
| 491 |
print(f"Fetching questions from: {questions_url}")
|
| 492 |
try:
|
| 493 |
+
response = requests.get(questions_url, timeout=30)
|
| 494 |
response.raise_for_status()
|
| 495 |
questions_data = response.json()
|
| 496 |
if not questions_data:
|
|
|
|
| 512 |
results_log = []
|
| 513 |
answers_payload = []
|
| 514 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 515 |
+
start_time = time.time()
|
| 516 |
+
question_counter = 0
|
| 517 |
+
|
| 518 |
for item in questions_data:
|
| 519 |
+
question_counter += 1
|
| 520 |
task_id = item.get("task_id")
|
| 521 |
question_text = item.get("question")
|
| 522 |
+
print(f"\n--- Processing Task ID: {task_id} ({question_counter}/{len(questions_data)}) ---")
|
| 523 |
+
print(f"Question: {question_text[:150]}...")
|
| 524 |
+
|
| 525 |
if not task_id or question_text is None:
|
| 526 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 527 |
+
results_log.append({"Task ID": "N/A", "Question": str(item), "Submitted Answer": "Skipped (missing data)"})
|
| 528 |
continue
|
| 529 |
+
|
| 530 |
+
submitted_answer = "Agent failed to run." # Default in case of crash
|
| 531 |
try:
|
| 532 |
+
# Pass task_id to the agent's call method
|
| 533 |
+
submitted_answer = agent(task_id, question_text)
|
| 534 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 535 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 536 |
except Exception as e:
|
| 537 |
+
print(f"FATAL ERROR running agent on task {task_id}: {e}")
|
| 538 |
+
import traceback
|
| 539 |
+
traceback.print_exc()
|
| 540 |
+
error_answer = f"AGENT CRASHED on task {task_id}: {e}"
|
| 541 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": error_answer}) # Submit error answer
|
| 542 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_answer})
|
| 543 |
+
|
| 544 |
+
end_time = time.time()
|
| 545 |
+
total_duration = end_time - start_time
|
| 546 |
+
print(f"\nFinished running agent on {len(questions_data)} questions in {total_duration:.2f} seconds.")
|
| 547 |
|
| 548 |
if not answers_payload:
|
| 549 |
print("Agent did not produce any answers to submit.")
|
| 550 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 551 |
|
| 552 |
+
# 4. Prepare Submission
|
| 553 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 554 |
+
status_update = f"Agent finished running. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 555 |
print(status_update)
|
| 556 |
|
| 557 |
# 5. Submit
|
| 558 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 559 |
try:
|
| 560 |
+
response = requests.post(submit_url, json=submission_data, timeout=180) # Increased timeout further
|
| 561 |
response.raise_for_status()
|
| 562 |
result_data = response.json()
|
| 563 |
final_status = (
|
|
|
|
| 600 |
|
| 601 |
# --- Build Gradio Interface using Blocks ---
|
| 602 |
with gr.Blocks() as demo:
|
| 603 |
+
gr.Markdown("# GAIA Level 1 Agent Evaluation Runner")
|
| 604 |
gr.Markdown(
|
| 605 |
"""
|
| 606 |
**Instructions:**
|
| 607 |
|
| 608 |
+
1. **Clone this space.**
|
| 609 |
+
2. **Set Environment Variables:** Go to the Space settings and add your `OPENAI_API_KEY` and `TAVILY_API_KEY` as secret variables. These are required for the agent to use the LLM and search tools.
|
| 610 |
+
3. Modify the `GaiaAgent` code (above) to improve its reasoning, tool use, and prompt engineering.
|
| 611 |
+
4. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
| 612 |
+
5. Click 'Run Evaluation & Submit All Answers' to fetch questions (with task IDs), run your agent (which can now download files via the API using the task ID), submit answers, and see the score.
|
| 613 |
|
| 614 |
---
|
| 615 |
+
**Agent Details:**
|
| 616 |
+
This agent uses `langgraph` and `langchain`. It has access to:
|
| 617 |
+
* Web search via Tavily (`tavily_search`).
|
| 618 |
+
* Listing local files (`list_local_files`).
|
| 619 |
+
* Reading local files (`read_local_file`).
|
| 620 |
+
* **Downloading and reading files provided with a task via the evaluation API (`download_and_read_file`), using the task ID.**
|
| 621 |
+
* A capable LLM (OpenAI's `gpt-4o-mini` by default, requires `OPENAI_API_KEY`).
|
| 622 |
+
|
| 623 |
**Disclaimers:**
|
| 624 |
+
Running the evaluation can take several minutes as the agent processes each question. The provided agent is a starting point; significant prompt engineering, tool refinement, and potentially more sophisticated graph logic will be needed to reliably score above 30%.
|
|
|
|
| 625 |
"""
|
| 626 |
)
|
| 627 |
|
|
|
|
| 630 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 631 |
|
| 632 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 633 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, row_selectable=False)
|
|
|
|
| 634 |
|
| 635 |
run_button.click(
|
| 636 |
fn=run_and_submit_all,
|
|
|
|
| 639 |
|
| 640 |
if __name__ == "__main__":
|
| 641 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
|
| 642 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 643 |
+
space_id_startup = os.getenv("SPACE_ID")
|
| 644 |
|
| 645 |
if space_host_startup:
|
| 646 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
|
| 648 |
else:
|
| 649 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 650 |
|
| 651 |
+
if space_id_startup:
|
| 652 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 653 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 654 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 655 |
else:
|
| 656 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 657 |
|
| 658 |
+
if missing_vars:
|
| 659 |
+
print("\n" + "="*50)
|
| 660 |
+
print("WARNING: Launching interface but required API keys are missing.")
|
| 661 |
+
print("Set OPENAI_API_KEY and TAVILY_API_KEY in environment variables.")
|
| 662 |
+
print("The agent WILL FAIL without these.")
|
| 663 |
+
print("="*50 + "\n")
|
| 664 |
+
|
| 665 |
+
|
| 666 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 667 |
|
| 668 |
+
print("Launching Gradio Interface for GAIA Level 1 Agent Evaluation...")
|
| 669 |
demo.launch(debug=True, share=False)
|