AdityaPandey commited on
Commit
2a61e41
·
verified ·
1 Parent(s): 81917a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +511 -38
app.py CHANGED
@@ -3,29 +3,462 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
  username= f"{profile.username}"
@@ -34,24 +467,30 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
34
  print("User not logged in.")
35
  return "Please Login to Hugging Face with the button.", None
36
 
 
 
 
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
 
 
50
 
51
  # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
@@ -73,33 +512,52 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
 
 
 
76
  for item in questions_data:
 
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
 
81
  continue
 
 
82
  try:
83
- submitted_answer = agent(question_text)
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
99
  # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
104
  result_data = response.json()
105
  final_status = (
@@ -142,19 +600,28 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
153
 
154
  ---
 
 
 
 
 
 
 
 
155
  **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
@@ -163,8 +630,7 @@ with gr.Blocks() as demo:
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
  run_button.click(
170
  fn=run_and_submit_all,
@@ -173,9 +639,8 @@ with gr.Blocks() as demo:
173
 
174
  if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +648,22 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
 
 
 
 
 
 
 
 
 
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import json
7
+ import mimetypes
8
+ import glob
9
+ import time # Added for timing
10
+
11
+ # LangChain/LangGraph imports
12
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
13
+ from langchain_core.prompts import ChatPromptTemplate
14
+ from langchain_core.runnables import RunnablePassthrough
15
+ from langchain_core.tools import tool
16
+ from langchain_openai import ChatOpenAI
17
+ from langgraph.graph import END, StateGraph
18
+ from langgraph.prebuilt import ToolExecutor
19
+ from langchain_core.utils.function_calling import format_tool_to_openai_function # Helper for tool formatting if needed
20
+
21
+ # Tool Imports
22
+ from langchain_community.tools.tavily_search import TavilySearchResults
23
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader # Import specific loaders
24
+ from pathlib import Path # For file path handling
25
 
 
26
  # --- Constants ---
27
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
+ FILE_DOWNLOAD_BASE_URL = f"{DEFAULT_API_URL}/files"
29
+ # Define a directory where files might be downloaded or located
30
+ DATA_DIR = "./data" # Use this as a base for downloaded files too
31
+
32
+ # --- Ensure Data Directory Exists ---
33
+ os.makedirs(DATA_DIR, exist_ok=True)
34
+ print(f"Data directory '{DATA_DIR}' ensured.")
35
+
36
+ # --- Environment Variable Check ---
37
+ required_env_vars = ["OPENAI_API_KEY", "TAVILY_API_KEY"]
38
+ missing_vars = [var for var in required_env_vars if not os.getenv(var)]
39
+
40
+ if missing_vars:
41
+ print("\n" + "="*50)
42
+ print("ERROR: Missing required environment variables!")
43
+ print("Please set the following environment variables:")
44
+ for var in missing_vars:
45
+ print(f"- {var}")
46
+ print(f"\nThey should be set in the Hugging Face Space settings under 'Variables'.")
47
+ print("You will not be able to run the agent without these.")
48
+ print("="*50 + "\n")
49
+ # In a real app, you might raise an error or disable functionality
50
+
51
+ # --- Define Tools ---
52
+
53
+ @tool
54
+ def list_local_files(directory: str = DATA_DIR) -> str:
55
+ """List files available locally in a directory. Defaults to the DATA_DIR."""
56
+ try:
57
+ # List all files and directories, but focus on files
58
+ # Use ** to search recursively within task_id subdirectories as well
59
+ entries = glob.glob(os.path.join(directory, '**/*'), recursive=True)
60
+ files = [entry for entry in entries if os.path.isfile(entry)]
61
+ if not files:
62
+ return f"No files found in local directory: {directory} (and subdirectories)"
63
+ # Return paths relative to DATA_DIR for clarity
64
+ file_list = "\n".join([os.path.relpath(f, DATA_DIR) for f in files])
65
+ return f"Files available locally in {directory} (and subdirectories):\n{file_list}"
66
+ except Exception as e:
67
+ return f"Error listing local files in {directory}: {e}"
68
+
69
+ @tool
70
+ def read_local_file(filepath: str, directory: str = DATA_DIR) -> str:
71
+ """Read the content of a specified local file within a directory. Supports .txt and .pdf. Filepath should be relative to the directory (e.g., task_id/filename.txt). Defaults to the DATA_DIR."""
72
+ # Sanitize filepath to prevent directory traversal
73
+ base_path = Path(directory).resolve()
74
+ full_path = (base_path / filepath).resolve()
75
+
76
+ # Check if the resolved path is actually within the intended base directory
77
+ try:
78
+ full_path.relative_to(base_path)
79
+ except ValueError:
80
+ return f"Error: Access denied. Filepath '{filepath}' is outside the allowed directory '{directory}'."
81
+
82
+
83
+ if not full_path.exists():
84
+ return f"Error: Local file not found at {full_path}"
85
+ if not full_path.is_file():
86
+ return f"Error: {filepath} is not a local file."
87
+
88
+ try:
89
+ mime_type, _ = mimetypes.guess_type(full_path)
90
+ print(f"Attempting to read file {full_path} with mime type {mime_type}")
91
+
92
+ if mime_type == 'application/pdf':
93
+ loader = PyPDFLoader(str(full_path))
94
+ docs = loader.load()
95
+ content = "\n".join([doc.page_content for doc in docs])
96
+ return f"Content of {filepath} (PDF):\n{content[:4000]}..." # Increased limit slightly
97
+ elif mime_type and mime_type.startswith('text/'):
98
+ loader = TextLoader(str(full_path))
99
+ docs = loader.load()
100
+ content = "\n".join([doc.page_content for doc in docs])
101
+ return f"Content of {filepath} (Text):\n{content[:4000]}..." # Increased limit slightly
102
+ else:
103
+ # Fallback for other types - try reading as text
104
+ try:
105
+ loader = TextLoader(str(full_path))
106
+ docs = loader.load()
107
+ content = "\n".join([doc.page_content for doc in docs])
108
+ return f"Content of {filepath} (Attempted Text Read):\n{content[:4000]}..." # Increased limit slightly
109
+ except Exception:
110
+ return f"Error: Unsupported local file type for {filepath}. Mime type: {mime_type}. Cannot read content as text."
111
+
112
+ except Exception as e:
113
+ return f"Error reading local file {filepath}: {e}"
114
+
115
+
116
+ @tool
117
+ def download_and_read_file(task_id: str, filename: str) -> str:
118
+ """Downloads a file provided for a specific task ID from the evaluation API and then reads its content. Saves files to ./data/{task_id}/"""
119
+ download_url = f"{FILE_DOWNLOAD_BASE_URL}/{task_id}"
120
+ save_dir = os.path.join(DATA_DIR, str(task_id))
121
+ os.makedirs(save_dir, exist_ok=True) # Ensure task-specific directory exists
122
+ save_path = os.path.join(save_dir, filename)
123
+
124
+ print(f"Attempting to download file '{filename}' for task '{task_id}' from {download_url}")
125
+
126
+ try:
127
+ # The API endpoint /files/{task_id} seems to return a zip or archive
128
+ # Let's assume for now it returns the *specific* file if requested via query param or it handles internally?
129
+ # The prompt mentions GET /files/{task_id} which implies the task_id is the key,
130
+ # but the example question didn't specify how files are listed or referenced beyond their name.
131
+ # A common GAIA pattern is the API returning a list of file *metadata* or an archive.
132
+ # Let's adjust: Assume the API at /files/{task_id} returns a ZIP containing *all* files for that task.
133
+ # We need to download the zip, extract it, and *then* read the specific file.
134
+
135
+ # Revised tool logic: Download the task_id archive, extract it, then read the requested filename.
136
+ archive_url = f"{FILE_DOWNLOAD_BASE_URL}/{task_id}"
137
+ print(f"Downloading archive for task {task_id} from {archive_url}")
138
+ response = requests.get(archive_url, stream=True, timeout=30) # Increased timeout
139
+ response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
140
+
141
+ archive_path = os.path.join(save_dir, f"task_{task_id}_files.zip")
142
+ with open(archive_path, 'wb') as f:
143
+ for chunk in response.iter_content(chunk_size=8192):
144
+ f.write(chunk)
145
+ print(f"Downloaded archive to {archive_path}")
146
+
147
+ # Extract the archive
148
+ import zipfile
149
+ extract_dir = os.path.join(save_dir, "extracted")
150
+ os.makedirs(extract_dir, exist_ok=True)
151
+ with zipfile.ZipFile(archive_path, 'r') as zip_ref:
152
+ zip_ref.extractall(extract_dir)
153
+ print(f"Extracted archive to {extract_dir}")
154
+
155
+ # Now, find the requested filename within the extracted directory
156
+ # Use glob to find the file, case-insensitively and allowing for potential path variations inside the zip
157
+ search_pattern = os.path.join(extract_dir, '**', filename)
158
+ found_files = glob.glob(search_pattern, recursive=True, include_hidden=True)
159
+
160
+ if not found_files:
161
+ return f"Error: File '{filename}' not found within the archive for task {task_id} after extraction to {extract_dir}. Files found: {list_local_files(extract_dir)}" # List files found in extracted dir
162
+
163
+ if len(found_files) > 1:
164
+ print(f"Warning: Found multiple files matching '{filename}' in archive for task {task_id}. Using the first one: {found_files[0]}")
165
+
166
+ file_to_read_path = found_files[0] # Use the first match
167
+
168
+ # Now read the extracted file using the existing read logic
169
+ # We can reuse parts of read_local_file or just inline it
170
+ print(f"Attempting to read extracted file: {file_to_read_path}")
171
+ mime_type, _ = mimetypes.guess_type(file_to_read_path)
172
+ print(f"Detected mime type: {mime_type}")
173
+
174
+ content = ""
175
+ if mime_type == 'application/pdf':
176
+ loader = PyPDFLoader(str(file_to_read_path))
177
+ docs = loader.load()
178
+ content = "\n".join([doc.page_content for doc in docs])
179
+ elif mime_type and mime_type.startswith('text/'):
180
+ loader = TextLoader(str(file_to_read_path))
181
+ docs = loader.load()
182
+ content = "\n".join([doc.page_content for doc in docs])
183
+ else:
184
+ # Fallback for other types - try reading as text
185
+ try:
186
+ loader = TextLoader(str(file_to_read_path))
187
+ docs = loader.load()
188
+ content = "\n".join([doc.page_content for doc in docs])
189
+ except Exception:
190
+ return f"Error: Unsupported file type for '{filename}' ({file_to_read_path}). Mime type: {mime_type}. Cannot read content after download and extraction."
191
+
192
+
193
+ return f"Successfully downloaded and read '{filename}' for task {task_id}. Content:\n{content[:4000]}..." # Limit output size
194
+
195
+ except requests.exceptions.RequestException as e:
196
+ return f"Error downloading archive for task {task_id}: {e}"
197
+ except zipfile.BadZipFile:
198
+ return f"Error: Downloaded file for task {task_id} is not a valid zip archive."
199
+ except FileNotFoundError:
200
+ return f"Error: The file '{filename}' was expected but not found in the extracted archive for task {task_id}."
201
+ except Exception as e:
202
+ import traceback
203
+ traceback.print_exc()
204
+ return f"An unexpected error occurred during download or reading of '{filename}' for task {task_id}: {e}"
205
+
206
+
207
+ # Initialize Tavily Search Tool
208
+ tavily_tool = TavilySearchResults(max_results=5)
209
+
210
+ # List of tools available to the agent
211
+ # Added the new download_and_read_file tool
212
+ tools = [tavily_tool, list_local_files, read_local_file, download_and_read_file]
213
+
214
+ # --- Define the LangGraph Agent ---
215
+
216
+ # Define the state for the graph
217
+ # Added 'task_id' to the state
218
+ class AgentState:
219
+ messages: list
220
+ task_id: str | None = None # Store the current task ID
221
+
222
+ # Define nodes
223
+ def call_llm(state: AgentState):
224
+ """Invokes the LLM to make a decision or generate a response."""
225
+ messages = state['messages']
226
+ task_id = state.get('task_id') # Get task_id from state if available
227
+ print(f"\n---Calling LLM (Task ID: {task_id})---")
228
+ # Provide task_id in the prompt if helpful? Or rely on tool signature?
229
+ # Relying on tool signature is better. The LLM should understand it needs task_id for the download tool.
230
+
231
+ # Add a system message to guide the agent? Optional but often helpful.
232
+ # We can bind this to the LLM later or add it here dynamically.
233
+ # Let's add a basic system message when invoking the graph.
234
+ # For now, just invoke with the current messages.
235
 
236
+ response = llm_with_tools.invoke(messages)
237
+ print(f"LLM Response type: {type(response)}")
238
+ print(f"LLM Response: {response}")
239
+ return {'messages': messages + [response], 'task_id': task_id}
240
+
241
+ def call_tool(state: AgentState):
242
+ """Executes the tools specified by the LLM."""
243
+ messages = state['messages']
244
+ task_id = state.get('task_id') # Get task_id from state
245
+ last_message = messages[-1]
246
+ print(f"\n---Executing Tools (Task ID: {task_id})---")
247
+
248
+ tool_outputs = []
249
+ # Ensure the last message is a tool call message
250
+ if not hasattr(last_message, 'tool_calls') or not last_message.tool_calls:
251
+ print("Last message was not a tool call message. This shouldn't happen if routing is correct.")
252
+ # Return state unchanged or with an error message? Returning unchanged might lead to loop.
253
+ # Let's add a message indicating the issue.
254
+ tool_outputs.append(ToolMessage(content="Agent attempted to call tools but the last message didn't contain tool calls.", tool_call_id="error"))
255
+ return {'messages': messages + tool_outputs, 'task_id': task_id}
256
+
257
+
258
+ for tool_call in last_message.tool_calls:
259
+ print(f"Executing tool: {tool_call.tool} with args: {tool_call.args} (Call ID: {tool_call.id})")
260
+ try:
261
+ # The tool_executor.invoke expects the message *containing* the tool calls
262
+ # It handles calling the correct tool function based on the message content
263
+ output = tool_executor.invoke(last_message)
264
+ # The output structure from ToolExecutor.invoke(message_with_tool_calls)
265
+ # can be complex (e.g., list of dicts or messages).
266
+ # A simpler approach is to manually call the tool functions using tool_call.args
267
+ # and construct ToolMessage with the corresponding tool_call.id.
268
+
269
+ # Find the tool function by name
270
+ tool_function = next((t for t in tools if t.name == tool_call.tool), None)
271
+ if tool_function:
272
+ # Execute the tool function with parsed arguments
273
+ # Be careful: tool_call.args is a dict, need to unpack it
274
+ # Ensure args match tool signature, this relies on LLM's correctness
275
+ print(f"Attempting to call {tool_call.tool}(**{tool_call.args})")
276
+ # Need to handle potential errors in unpacking args or tool execution
277
+ try:
278
+ # Pass task_id to the tool args if the tool expects it
279
+ # The download_and_read_file tool expects task_id and filename
280
+ # The LLM *must* include task_id in the args when calling this tool
281
+ if 'task_id' in tool_call.args and tool_call.tool == 'download_and_read_file':
282
+ tool_result = tool_function.invoke(tool_call.args) # Use invoke for LangChain tools
283
+ else:
284
+ # For other tools, just pass the args provided by the LLM
285
+ tool_result = tool_function.invoke(tool_call.args) # Use invoke
286
+
287
+ tool_outputs.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call.id))
288
+ print(f"Tool '{tool_call.tool}' output: {tool_result[:200]}...")
289
+ except Exception as e:
290
+ print(f"Error executing tool {tool_call.tool} with args {tool_call.args}: {e}")
291
+ import traceback
292
+ traceback.print_exc()
293
+ tool_outputs.append(ToolMessage(content=f"Error executing {tool_call.tool}: {e}", tool_call_id=tool_call.id))
294
+ else:
295
+ print(f"Error: Tool '{tool_call.tool}' not found.")
296
+ tool_outputs.append(ToolMessage(content=f"Error: Tool '{tool_call.tool}' not found.", tool_call_id=tool_call.id))
297
+
298
+ except Exception as e:
299
+ print(f"An unexpected error occurred during tool execution for {tool_call.tool} (ID: {tool_call.id}): {e}")
300
+ import traceback
301
+ traceback.print_exc()
302
+ tool_outputs.append(ToolMessage(content=f"An unexpected error occurred during tool execution for {tool_call.tool}: {e}", tool_call_id=tool_call.id))
303
+
304
+
305
+ # Append tool outputs to the messages
306
+ # Each output needs to be linked back to the tool call ID
307
+ return {'messages': messages + tool_outputs, 'task_id': task_id}
308
+
309
+ # Define conditional edge logic
310
+ def route_tools(state: AgentState):
311
+ """Routes the agent based on whether the LLM decided to use a tool."""
312
+ last_message = state['messages'][-1]
313
+ print(f"\n---Routing (Task ID: {state.get('task_id')})---")
314
+ print(f"Last message type: {type(last_message)}")
315
+ print(f"Last message content: {last_message.content}")
316
+ print(f"Last message tool_calls: {hasattr(last_message, 'tool_calls') and last_message.tool_calls}")
317
+
318
+ # If the last message has tool calls, route to the tool execution node
319
+ if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
320
+ print("Routing to tool_execution")
321
+ return 'tool_execution'
322
+ # Otherwise, route to the end node (assuming it's a final answer)
323
+ print("Routing to end")
324
+ return END
325
+
326
+ # Initialize the LLM and bind tools
327
+ try:
328
+ # Add a system prompt to guide the LLM
329
+ system_prompt = (
330
+ "You are a helpful AI assistant designed to answer complex real-world questions based on provided tools."
331
+ "You have access to web search and the ability to list and read local files, including downloading files provided with a task."
332
+ "Follow these steps:"
333
+ "1. Carefully analyze the user's question."
334
+ "2. Break down the question into smaller steps required to find the answer."
335
+ "3. Use the available tools (`tavily_search`, `list_local_files`, `read_local_file`, `download_and_read_file`) to gather necessary information."
336
+ " - If the question mentions files and you have the task_id available in your state, use the `download_and_read_file` tool with the correct `task_id` and `filename` to get file contents."
337
+ " - If files are already downloaded or you need to see what's available locally, use `list_local_files`."
338
+ " - Use `read_local_file` to read files already on disk (e.g., after downloading or if pre-loaded)."
339
+ " - Use `tavily_search` for general knowledge or looking up information online."
340
+ "4. Synthesize the information gathered from the tools."
341
+ "5. Formulate the final answer based *only* on the information you have retrieved or reasoned about."
342
+ "6. Ensure the answer is in the exact format requested by the user (e.g., comma-separated list, specific ordering)."
343
+ "7. If you have sufficient information and the answer is ready, provide the final answer. Otherwise, use tools or ask for clarification if necessary (though for this benchmark, you must attempt to answer)."
344
+ "Do NOT make up information."
345
+ )
346
+
347
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) # gpt-4o-mini is cheaper and often sufficient
348
+ llm_with_tools = llm.bind_tools(tools)
349
+ print("LLM and tools initialized.")
350
+ except Exception as e:
351
+ print(f"Error initializing LLM or binding tools: {e}")
352
+ llm = None
353
+ llm_with_tools = None
354
+
355
+
356
+ # Initialize the ToolExecutor (though we are manually invoking tools in call_tool node)
357
+ # We keep this mainly for reference or potential future use if we switch back
358
+ tool_executor = ToolExecutor(tools)
359
+ print("Tool executor initialized.")
360
+
361
+ # Build the graph
362
+ workflow = StateGraph(AgentState)
363
+
364
+ # Add nodes
365
+ workflow.add_node("llm", call_llm)
366
+ workflow.add_node("tool_execution", call_tool)
367
+
368
+ # Set the entry point
369
+ workflow.set_entry_point("llm")
370
+
371
+ # Add edges
372
+ workflow.add_edge("tool_execution", "llm") # After executing tools, always go back to the LLM to decide next step
373
+
374
+ # Add conditional edge from LLM
375
+ workflow.add_conditional_edge(
376
+ "llm",
377
+ route_tools,
378
+ {"tool_execution": "tool_execution", END: END}
379
+ )
380
+
381
+ # Compile the graph
382
+ try:
383
+ app = workflow.compile()
384
+ print("LangGraph compiled successfully.")
385
+ except Exception as e:
386
+ print(f"Error compiling LangGraph workflow: {e}")
387
+ import traceback
388
+ traceback.print_exc()
389
+ app = None
390
+
391
+ # --- Agent Definition using LangGraph ---
392
+ class GaiaAgent:
393
  def __init__(self):
394
+ print("GaiaAgent initialized.")
395
+ if app is None or llm_with_tools is None:
396
+ raise RuntimeError("Agent failed to initialize due to missing API keys or graph compilation error.")
397
+ self.agent_app = app # Store the compiled graph
398
+ self.system_prompt = system_prompt # Store the system prompt
399
+
400
+ # Modified __call__ to accept task_id
401
+ def __call__(self, task_id: str, question: str) -> str:
402
+ print(f"\nAgent received question for Task ID {task_id}: {question}")
403
+
404
+ if self.agent_app is None:
405
+ print("Agent app is not compiled. Cannot process question.")
406
+ return "Agent failed to initialize."
407
+
408
+ try:
409
+ # Run the LangGraph workflow
410
+ # The initial state includes the system prompt, the human message, and the task_id
411
+ initial_messages = [
412
+ AIMessage(content=self.system_prompt), # Start with System prompt guidance (as an AI message often works well with tool calling)
413
+ HumanMessage(content=question)
414
+ ]
415
+ print("Invoking LangGraph agent...")
416
+ # Pass task_id in the initial state
417
+ final_state = self.agent_app.invoke(
418
+ {"messages": initial_messages, "task_id": task_id},
419
+ # config={"recursion_limit": 50} # Optional: increase recursion depth
420
+ )
421
+ print("LangGraph agent finished.")
422
+ # print(f"Final state messages: {final_state['messages']}") # Can be verbose
423
+
424
+ # Extract the final answer from the last AIMessage
425
+ final_message = final_state['messages'][-1]
426
+ if isinstance(final_message, AIMessage):
427
+ final_answer = final_message.content
428
+ print(f"Agent returning answer: {final_answer}")
429
+ return final_answer
430
+ else:
431
+ # If the last message isn't an AIMessage, it means the agent didn't converge
432
+ # or the routing is incorrect. Return the last message content or an error.
433
+ # This might happen if the agent ends after a tool call and doesn't produce a final AIMessage.
434
+ error_message = f"Agent finished in an unexpected state. Last message type: {type(final_message)}. Content: {final_message}"
435
+ print(error_message)
436
+ # Attempt to extract *any* useful content from the last message if not AIMessage
437
+ content_attempt = ""
438
+ if hasattr(final_message, 'content'):
439
+ content_attempt = str(final_message.content)
440
+ elif hasattr(final_message, 'tool_outputs'):
441
+ content_attempt = "Tool Outputs: " + json.dumps(final_message.tool_outputs)
442
+ else:
443
+ content_attempt = str(final_message)
444
+
445
+ return f"Agent finished without a final answer message. Last output: {content_attempt[:500]}"
446
+
447
+ except Exception as e:
448
+ print(f"Error during agent execution: {e}")
449
+ import traceback
450
+ traceback.print_exc()
451
+ return f"An error occurred during agent execution: {e}"
452
+
453
+
454
+ # --- Rest of the original code (kept mostly as is) ---
455
 
456
  def run_and_submit_all( profile: gr.OAuthProfile | None):
457
  """
458
+ Fetches all questions, runs the GaiaAgent on them, submits all answers,
459
  and displays the results.
460
  """
461
+ space_id = os.getenv("SPACE_ID")
 
462
 
463
  if profile:
464
  username= f"{profile.username}"
 
467
  print("User not logged in.")
468
  return "Please Login to Hugging Face with the button.", None
469
 
470
+ if missing_vars:
471
+ return f"Missing required environment variables: {', '.join(missing_vars)}. Please set them in your Space settings.", None
472
+
473
  api_url = DEFAULT_API_URL
474
  questions_url = f"{api_url}/questions"
475
  submit_url = f"{api_url}/submit"
476
 
477
+ # 1. Instantiate Agent
478
  try:
479
+ agent = GaiaAgent()
480
+ print("GaiaAgent instantiated successfully.")
481
  except Exception as e:
482
  print(f"Error instantiating agent: {e}")
483
+ import traceback
484
+ traceback.print_exc()
485
+ return f"Error initializing agent: {e}\nCheck your environment variables.", None
486
+
487
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local development environment - No Space ID"
488
+ print(f"Agent code link: {agent_code}")
489
 
490
  # 2. Fetch Questions
491
  print(f"Fetching questions from: {questions_url}")
492
  try:
493
+ response = requests.get(questions_url, timeout=30)
494
  response.raise_for_status()
495
  questions_data = response.json()
496
  if not questions_data:
 
512
  results_log = []
513
  answers_payload = []
514
  print(f"Running agent on {len(questions_data)} questions...")
515
+ start_time = time.time()
516
+ question_counter = 0
517
+
518
  for item in questions_data:
519
+ question_counter += 1
520
  task_id = item.get("task_id")
521
  question_text = item.get("question")
522
+ print(f"\n--- Processing Task ID: {task_id} ({question_counter}/{len(questions_data)}) ---")
523
+ print(f"Question: {question_text[:150]}...")
524
+
525
  if not task_id or question_text is None:
526
  print(f"Skipping item with missing task_id or question: {item}")
527
+ results_log.append({"Task ID": "N/A", "Question": str(item), "Submitted Answer": "Skipped (missing data)"})
528
  continue
529
+
530
+ submitted_answer = "Agent failed to run." # Default in case of crash
531
  try:
532
+ # Pass task_id to the agent's call method
533
+ submitted_answer = agent(task_id, question_text)
534
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
535
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
536
  except Exception as e:
537
+ print(f"FATAL ERROR running agent on task {task_id}: {e}")
538
+ import traceback
539
+ traceback.print_exc()
540
+ error_answer = f"AGENT CRASHED on task {task_id}: {e}"
541
+ answers_payload.append({"task_id": task_id, "submitted_answer": error_answer}) # Submit error answer
542
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_answer})
543
+
544
+ end_time = time.time()
545
+ total_duration = end_time - start_time
546
+ print(f"\nFinished running agent on {len(questions_data)} questions in {total_duration:.2f} seconds.")
547
 
548
  if not answers_payload:
549
  print("Agent did not produce any answers to submit.")
550
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
551
 
552
+ # 4. Prepare Submission
553
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
554
+ status_update = f"Agent finished running. Submitting {len(answers_payload)} answers for user '{username}'..."
555
  print(status_update)
556
 
557
  # 5. Submit
558
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
559
  try:
560
+ response = requests.post(submit_url, json=submission_data, timeout=180) # Increased timeout further
561
  response.raise_for_status()
562
  result_data = response.json()
563
  final_status = (
 
600
 
601
  # --- Build Gradio Interface using Blocks ---
602
  with gr.Blocks() as demo:
603
+ gr.Markdown("# GAIA Level 1 Agent Evaluation Runner")
604
  gr.Markdown(
605
  """
606
  **Instructions:**
607
 
608
+ 1. **Clone this space.**
609
+ 2. **Set Environment Variables:** Go to the Space settings and add your `OPENAI_API_KEY` and `TAVILY_API_KEY` as secret variables. These are required for the agent to use the LLM and search tools.
610
+ 3. Modify the `GaiaAgent` code (above) to improve its reasoning, tool use, and prompt engineering.
611
+ 4. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
612
+ 5. Click 'Run Evaluation & Submit All Answers' to fetch questions (with task IDs), run your agent (which can now download files via the API using the task ID), submit answers, and see the score.
613
 
614
  ---
615
+ **Agent Details:**
616
+ This agent uses `langgraph` and `langchain`. It has access to:
617
+ * Web search via Tavily (`tavily_search`).
618
+ * Listing local files (`list_local_files`).
619
+ * Reading local files (`read_local_file`).
620
+ * **Downloading and reading files provided with a task via the evaluation API (`download_and_read_file`), using the task ID.**
621
+ * A capable LLM (OpenAI's `gpt-4o-mini` by default, requires `OPENAI_API_KEY`).
622
+
623
  **Disclaimers:**
624
+ Running the evaluation can take several minutes as the agent processes each question. The provided agent is a starting point; significant prompt engineering, tool refinement, and potentially more sophisticated graph logic will be needed to reliably score above 30%.
 
625
  """
626
  )
627
 
 
630
  run_button = gr.Button("Run Evaluation & Submit All Answers")
631
 
632
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
633
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, row_selectable=False)
 
634
 
635
  run_button.click(
636
  fn=run_and_submit_all,
 
639
 
640
  if __name__ == "__main__":
641
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
642
  space_host_startup = os.getenv("SPACE_HOST")
643
+ space_id_startup = os.getenv("SPACE_ID")
644
 
645
  if space_host_startup:
646
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
648
  else:
649
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
650
 
651
+ if space_id_startup:
652
  print(f"✅ SPACE_ID found: {space_id_startup}")
653
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
654
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
655
  else:
656
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
657
 
658
+ if missing_vars:
659
+ print("\n" + "="*50)
660
+ print("WARNING: Launching interface but required API keys are missing.")
661
+ print("Set OPENAI_API_KEY and TAVILY_API_KEY in environment variables.")
662
+ print("The agent WILL FAIL without these.")
663
+ print("="*50 + "\n")
664
+
665
+
666
  print("-"*(60 + len(" App Starting ")) + "\n")
667
 
668
+ print("Launching Gradio Interface for GAIA Level 1 Agent Evaluation...")
669
  demo.launch(debug=True, share=False)