AdityaPandey commited on
Commit
88e500c
·
verified ·
1 Parent(s): 2a61e41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -511
app.py CHANGED
@@ -1,464 +1,54 @@
 
1
  import os
 
2
  import gradio as gr
 
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- import json
7
- import mimetypes
8
- import glob
9
- import time # Added for timing
10
-
11
- # LangChain/LangGraph imports
12
- from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
13
- from langchain_core.prompts import ChatPromptTemplate
14
- from langchain_core.runnables import RunnablePassthrough
15
- from langchain_core.tools import tool
16
- from langchain_openai import ChatOpenAI
17
- from langgraph.graph import END, StateGraph
18
- from langgraph.prebuilt import ToolExecutor
19
- from langchain_core.utils.function_calling import format_tool_to_openai_function # Helper for tool formatting if needed
20
 
21
- # Tool Imports
22
- from langchain_community.tools.tavily_search import TavilySearchResults
23
- from langchain_community.document_loaders import TextLoader, PyPDFLoader # Import specific loaders
24
- from pathlib import Path # For file path handling
25
 
 
26
  # --- Constants ---
27
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
- FILE_DOWNLOAD_BASE_URL = f"{DEFAULT_API_URL}/files"
29
- # Define a directory where files might be downloaded or located
30
- DATA_DIR = "./data" # Use this as a base for downloaded files too
31
-
32
- # --- Ensure Data Directory Exists ---
33
- os.makedirs(DATA_DIR, exist_ok=True)
34
- print(f"Data directory '{DATA_DIR}' ensured.")
35
-
36
- # --- Environment Variable Check ---
37
- required_env_vars = ["OPENAI_API_KEY", "TAVILY_API_KEY"]
38
- missing_vars = [var for var in required_env_vars if not os.getenv(var)]
39
-
40
- if missing_vars:
41
- print("\n" + "="*50)
42
- print("ERROR: Missing required environment variables!")
43
- print("Please set the following environment variables:")
44
- for var in missing_vars:
45
- print(f"- {var}")
46
- print(f"\nThey should be set in the Hugging Face Space settings under 'Variables'.")
47
- print("You will not be able to run the agent without these.")
48
- print("="*50 + "\n")
49
- # In a real app, you might raise an error or disable functionality
50
-
51
- # --- Define Tools ---
52
-
53
- @tool
54
- def list_local_files(directory: str = DATA_DIR) -> str:
55
- """List files available locally in a directory. Defaults to the DATA_DIR."""
56
- try:
57
- # List all files and directories, but focus on files
58
- # Use ** to search recursively within task_id subdirectories as well
59
- entries = glob.glob(os.path.join(directory, '**/*'), recursive=True)
60
- files = [entry for entry in entries if os.path.isfile(entry)]
61
- if not files:
62
- return f"No files found in local directory: {directory} (and subdirectories)"
63
- # Return paths relative to DATA_DIR for clarity
64
- file_list = "\n".join([os.path.relpath(f, DATA_DIR) for f in files])
65
- return f"Files available locally in {directory} (and subdirectories):\n{file_list}"
66
- except Exception as e:
67
- return f"Error listing local files in {directory}: {e}"
68
-
69
- @tool
70
- def read_local_file(filepath: str, directory: str = DATA_DIR) -> str:
71
- """Read the content of a specified local file within a directory. Supports .txt and .pdf. Filepath should be relative to the directory (e.g., task_id/filename.txt). Defaults to the DATA_DIR."""
72
- # Sanitize filepath to prevent directory traversal
73
- base_path = Path(directory).resolve()
74
- full_path = (base_path / filepath).resolve()
75
-
76
- # Check if the resolved path is actually within the intended base directory
77
- try:
78
- full_path.relative_to(base_path)
79
- except ValueError:
80
- return f"Error: Access denied. Filepath '{filepath}' is outside the allowed directory '{directory}'."
81
-
82
-
83
- if not full_path.exists():
84
- return f"Error: Local file not found at {full_path}"
85
- if not full_path.is_file():
86
- return f"Error: {filepath} is not a local file."
87
-
88
- try:
89
- mime_type, _ = mimetypes.guess_type(full_path)
90
- print(f"Attempting to read file {full_path} with mime type {mime_type}")
91
-
92
- if mime_type == 'application/pdf':
93
- loader = PyPDFLoader(str(full_path))
94
- docs = loader.load()
95
- content = "\n".join([doc.page_content for doc in docs])
96
- return f"Content of {filepath} (PDF):\n{content[:4000]}..." # Increased limit slightly
97
- elif mime_type and mime_type.startswith('text/'):
98
- loader = TextLoader(str(full_path))
99
- docs = loader.load()
100
- content = "\n".join([doc.page_content for doc in docs])
101
- return f"Content of {filepath} (Text):\n{content[:4000]}..." # Increased limit slightly
102
- else:
103
- # Fallback for other types - try reading as text
104
- try:
105
- loader = TextLoader(str(full_path))
106
- docs = loader.load()
107
- content = "\n".join([doc.page_content for doc in docs])
108
- return f"Content of {filepath} (Attempted Text Read):\n{content[:4000]}..." # Increased limit slightly
109
- except Exception:
110
- return f"Error: Unsupported local file type for {filepath}. Mime type: {mime_type}. Cannot read content as text."
111
-
112
- except Exception as e:
113
- return f"Error reading local file {filepath}: {e}"
114
-
115
-
116
- @tool
117
- def download_and_read_file(task_id: str, filename: str) -> str:
118
- """Downloads a file provided for a specific task ID from the evaluation API and then reads its content. Saves files to ./data/{task_id}/"""
119
- download_url = f"{FILE_DOWNLOAD_BASE_URL}/{task_id}"
120
- save_dir = os.path.join(DATA_DIR, str(task_id))
121
- os.makedirs(save_dir, exist_ok=True) # Ensure task-specific directory exists
122
- save_path = os.path.join(save_dir, filename)
123
-
124
- print(f"Attempting to download file '{filename}' for task '{task_id}' from {download_url}")
125
-
126
- try:
127
- # The API endpoint /files/{task_id} seems to return a zip or archive
128
- # Let's assume for now it returns the *specific* file if requested via query param or it handles internally?
129
- # The prompt mentions GET /files/{task_id} which implies the task_id is the key,
130
- # but the example question didn't specify how files are listed or referenced beyond their name.
131
- # A common GAIA pattern is the API returning a list of file *metadata* or an archive.
132
- # Let's adjust: Assume the API at /files/{task_id} returns a ZIP containing *all* files for that task.
133
- # We need to download the zip, extract it, and *then* read the specific file.
134
-
135
- # Revised tool logic: Download the task_id archive, extract it, then read the requested filename.
136
- archive_url = f"{FILE_DOWNLOAD_BASE_URL}/{task_id}"
137
- print(f"Downloading archive for task {task_id} from {archive_url}")
138
- response = requests.get(archive_url, stream=True, timeout=30) # Increased timeout
139
- response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
140
-
141
- archive_path = os.path.join(save_dir, f"task_{task_id}_files.zip")
142
- with open(archive_path, 'wb') as f:
143
- for chunk in response.iter_content(chunk_size=8192):
144
- f.write(chunk)
145
- print(f"Downloaded archive to {archive_path}")
146
-
147
- # Extract the archive
148
- import zipfile
149
- extract_dir = os.path.join(save_dir, "extracted")
150
- os.makedirs(extract_dir, exist_ok=True)
151
- with zipfile.ZipFile(archive_path, 'r') as zip_ref:
152
- zip_ref.extractall(extract_dir)
153
- print(f"Extracted archive to {extract_dir}")
154
-
155
- # Now, find the requested filename within the extracted directory
156
- # Use glob to find the file, case-insensitively and allowing for potential path variations inside the zip
157
- search_pattern = os.path.join(extract_dir, '**', filename)
158
- found_files = glob.glob(search_pattern, recursive=True, include_hidden=True)
159
-
160
- if not found_files:
161
- return f"Error: File '{filename}' not found within the archive for task {task_id} after extraction to {extract_dir}. Files found: {list_local_files(extract_dir)}" # List files found in extracted dir
162
-
163
- if len(found_files) > 1:
164
- print(f"Warning: Found multiple files matching '{filename}' in archive for task {task_id}. Using the first one: {found_files[0]}")
165
-
166
- file_to_read_path = found_files[0] # Use the first match
167
-
168
- # Now read the extracted file using the existing read logic
169
- # We can reuse parts of read_local_file or just inline it
170
- print(f"Attempting to read extracted file: {file_to_read_path}")
171
- mime_type, _ = mimetypes.guess_type(file_to_read_path)
172
- print(f"Detected mime type: {mime_type}")
173
-
174
- content = ""
175
- if mime_type == 'application/pdf':
176
- loader = PyPDFLoader(str(file_to_read_path))
177
- docs = loader.load()
178
- content = "\n".join([doc.page_content for doc in docs])
179
- elif mime_type and mime_type.startswith('text/'):
180
- loader = TextLoader(str(file_to_read_path))
181
- docs = loader.load()
182
- content = "\n".join([doc.page_content for doc in docs])
183
- else:
184
- # Fallback for other types - try reading as text
185
- try:
186
- loader = TextLoader(str(file_to_read_path))
187
- docs = loader.load()
188
- content = "\n".join([doc.page_content for doc in docs])
189
- except Exception:
190
- return f"Error: Unsupported file type for '{filename}' ({file_to_read_path}). Mime type: {mime_type}. Cannot read content after download and extraction."
191
-
192
-
193
- return f"Successfully downloaded and read '{filename}' for task {task_id}. Content:\n{content[:4000]}..." # Limit output size
194
-
195
- except requests.exceptions.RequestException as e:
196
- return f"Error downloading archive for task {task_id}: {e}"
197
- except zipfile.BadZipFile:
198
- return f"Error: Downloaded file for task {task_id} is not a valid zip archive."
199
- except FileNotFoundError:
200
- return f"Error: The file '{filename}' was expected but not found in the extracted archive for task {task_id}."
201
- except Exception as e:
202
- import traceback
203
- traceback.print_exc()
204
- return f"An unexpected error occurred during download or reading of '{filename}' for task {task_id}: {e}"
205
-
206
-
207
- # Initialize Tavily Search Tool
208
- tavily_tool = TavilySearchResults(max_results=5)
209
-
210
- # List of tools available to the agent
211
- # Added the new download_and_read_file tool
212
- tools = [tavily_tool, list_local_files, read_local_file, download_and_read_file]
213
-
214
- # --- Define the LangGraph Agent ---
215
-
216
- # Define the state for the graph
217
- # Added 'task_id' to the state
218
- class AgentState:
219
- messages: list
220
- task_id: str | None = None # Store the current task ID
221
-
222
- # Define nodes
223
- def call_llm(state: AgentState):
224
- """Invokes the LLM to make a decision or generate a response."""
225
- messages = state['messages']
226
- task_id = state.get('task_id') # Get task_id from state if available
227
- print(f"\n---Calling LLM (Task ID: {task_id})---")
228
- # Provide task_id in the prompt if helpful? Or rely on tool signature?
229
- # Relying on tool signature is better. The LLM should understand it needs task_id for the download tool.
230
-
231
- # Add a system message to guide the agent? Optional but often helpful.
232
- # We can bind this to the LLM later or add it here dynamically.
233
- # Let's add a basic system message when invoking the graph.
234
- # For now, just invoke with the current messages.
235
 
236
- response = llm_with_tools.invoke(messages)
237
- print(f"LLM Response type: {type(response)}")
238
- print(f"LLM Response: {response}")
239
- return {'messages': messages + [response], 'task_id': task_id}
240
 
241
- def call_tool(state: AgentState):
242
- """Executes the tools specified by the LLM."""
243
- messages = state['messages']
244
- task_id = state.get('task_id') # Get task_id from state
245
- last_message = messages[-1]
246
- print(f"\n---Executing Tools (Task ID: {task_id})---")
247
 
248
- tool_outputs = []
249
- # Ensure the last message is a tool call message
250
- if not hasattr(last_message, 'tool_calls') or not last_message.tool_calls:
251
- print("Last message was not a tool call message. This shouldn't happen if routing is correct.")
252
- # Return state unchanged or with an error message? Returning unchanged might lead to loop.
253
- # Let's add a message indicating the issue.
254
- tool_outputs.append(ToolMessage(content="Agent attempted to call tools but the last message didn't contain tool calls.", tool_call_id="error"))
255
- return {'messages': messages + tool_outputs, 'task_id': task_id}
256
-
257
-
258
- for tool_call in last_message.tool_calls:
259
- print(f"Executing tool: {tool_call.tool} with args: {tool_call.args} (Call ID: {tool_call.id})")
260
- try:
261
- # The tool_executor.invoke expects the message *containing* the tool calls
262
- # It handles calling the correct tool function based on the message content
263
- output = tool_executor.invoke(last_message)
264
- # The output structure from ToolExecutor.invoke(message_with_tool_calls)
265
- # can be complex (e.g., list of dicts or messages).
266
- # A simpler approach is to manually call the tool functions using tool_call.args
267
- # and construct ToolMessage with the corresponding tool_call.id.
268
-
269
- # Find the tool function by name
270
- tool_function = next((t for t in tools if t.name == tool_call.tool), None)
271
- if tool_function:
272
- # Execute the tool function with parsed arguments
273
- # Be careful: tool_call.args is a dict, need to unpack it
274
- # Ensure args match tool signature, this relies on LLM's correctness
275
- print(f"Attempting to call {tool_call.tool}(**{tool_call.args})")
276
- # Need to handle potential errors in unpacking args or tool execution
277
- try:
278
- # Pass task_id to the tool args if the tool expects it
279
- # The download_and_read_file tool expects task_id and filename
280
- # The LLM *must* include task_id in the args when calling this tool
281
- if 'task_id' in tool_call.args and tool_call.tool == 'download_and_read_file':
282
- tool_result = tool_function.invoke(tool_call.args) # Use invoke for LangChain tools
283
- else:
284
- # For other tools, just pass the args provided by the LLM
285
- tool_result = tool_function.invoke(tool_call.args) # Use invoke
286
-
287
- tool_outputs.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call.id))
288
- print(f"Tool '{tool_call.tool}' output: {tool_result[:200]}...")
289
- except Exception as e:
290
- print(f"Error executing tool {tool_call.tool} with args {tool_call.args}: {e}")
291
- import traceback
292
- traceback.print_exc()
293
- tool_outputs.append(ToolMessage(content=f"Error executing {tool_call.tool}: {e}", tool_call_id=tool_call.id))
294
- else:
295
- print(f"Error: Tool '{tool_call.tool}' not found.")
296
- tool_outputs.append(ToolMessage(content=f"Error: Tool '{tool_call.tool}' not found.", tool_call_id=tool_call.id))
297
-
298
- except Exception as e:
299
- print(f"An unexpected error occurred during tool execution for {tool_call.tool} (ID: {tool_call.id}): {e}")
300
- import traceback
301
- traceback.print_exc()
302
- tool_outputs.append(ToolMessage(content=f"An unexpected error occurred during tool execution for {tool_call.tool}: {e}", tool_call_id=tool_call.id))
303
-
304
-
305
- # Append tool outputs to the messages
306
- # Each output needs to be linked back to the tool call ID
307
- return {'messages': messages + tool_outputs, 'task_id': task_id}
308
-
309
- # Define conditional edge logic
310
- def route_tools(state: AgentState):
311
- """Routes the agent based on whether the LLM decided to use a tool."""
312
- last_message = state['messages'][-1]
313
- print(f"\n---Routing (Task ID: {state.get('task_id')})---")
314
- print(f"Last message type: {type(last_message)}")
315
- print(f"Last message content: {last_message.content}")
316
- print(f"Last message tool_calls: {hasattr(last_message, 'tool_calls') and last_message.tool_calls}")
317
-
318
- # If the last message has tool calls, route to the tool execution node
319
- if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
320
- print("Routing to tool_execution")
321
- return 'tool_execution'
322
- # Otherwise, route to the end node (assuming it's a final answer)
323
- print("Routing to end")
324
- return END
325
-
326
- # Initialize the LLM and bind tools
327
- try:
328
- # Add a system prompt to guide the LLM
329
- system_prompt = (
330
- "You are a helpful AI assistant designed to answer complex real-world questions based on provided tools."
331
- "You have access to web search and the ability to list and read local files, including downloading files provided with a task."
332
- "Follow these steps:"
333
- "1. Carefully analyze the user's question."
334
- "2. Break down the question into smaller steps required to find the answer."
335
- "3. Use the available tools (`tavily_search`, `list_local_files`, `read_local_file`, `download_and_read_file`) to gather necessary information."
336
- " - If the question mentions files and you have the task_id available in your state, use the `download_and_read_file` tool with the correct `task_id` and `filename` to get file contents."
337
- " - If files are already downloaded or you need to see what's available locally, use `list_local_files`."
338
- " - Use `read_local_file` to read files already on disk (e.g., after downloading or if pre-loaded)."
339
- " - Use `tavily_search` for general knowledge or looking up information online."
340
- "4. Synthesize the information gathered from the tools."
341
- "5. Formulate the final answer based *only* on the information you have retrieved or reasoned about."
342
- "6. Ensure the answer is in the exact format requested by the user (e.g., comma-separated list, specific ordering)."
343
- "7. If you have sufficient information and the answer is ready, provide the final answer. Otherwise, use tools or ask for clarification if necessary (though for this benchmark, you must attempt to answer)."
344
- "Do NOT make up information."
345
- )
346
-
347
- llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) # gpt-4o-mini is cheaper and often sufficient
348
- llm_with_tools = llm.bind_tools(tools)
349
- print("LLM and tools initialized.")
350
- except Exception as e:
351
- print(f"Error initializing LLM or binding tools: {e}")
352
- llm = None
353
- llm_with_tools = None
354
-
355
-
356
- # Initialize the ToolExecutor (though we are manually invoking tools in call_tool node)
357
- # We keep this mainly for reference or potential future use if we switch back
358
- tool_executor = ToolExecutor(tools)
359
- print("Tool executor initialized.")
360
-
361
- # Build the graph
362
- workflow = StateGraph(AgentState)
363
-
364
- # Add nodes
365
- workflow.add_node("llm", call_llm)
366
- workflow.add_node("tool_execution", call_tool)
367
-
368
- # Set the entry point
369
- workflow.set_entry_point("llm")
370
-
371
- # Add edges
372
- workflow.add_edge("tool_execution", "llm") # After executing tools, always go back to the LLM to decide next step
373
-
374
- # Add conditional edge from LLM
375
- workflow.add_conditional_edge(
376
- "llm",
377
- route_tools,
378
- {"tool_execution": "tool_execution", END: END}
379
- )
380
-
381
- # Compile the graph
382
- try:
383
- app = workflow.compile()
384
- print("LangGraph compiled successfully.")
385
- except Exception as e:
386
- print(f"Error compiling LangGraph workflow: {e}")
387
- import traceback
388
- traceback.print_exc()
389
- app = None
390
-
391
- # --- Agent Definition using LangGraph ---
392
- class GaiaAgent:
393
  def __init__(self):
394
- print("GaiaAgent initialized.")
395
- if app is None or llm_with_tools is None:
396
- raise RuntimeError("Agent failed to initialize due to missing API keys or graph compilation error.")
397
- self.agent_app = app # Store the compiled graph
398
- self.system_prompt = system_prompt # Store the system prompt
399
-
400
- # Modified __call__ to accept task_id
401
- def __call__(self, task_id: str, question: str) -> str:
402
- print(f"\nAgent received question for Task ID {task_id}: {question}")
403
-
404
- if self.agent_app is None:
405
- print("Agent app is not compiled. Cannot process question.")
406
- return "Agent failed to initialize."
407
-
408
- try:
409
- # Run the LangGraph workflow
410
- # The initial state includes the system prompt, the human message, and the task_id
411
- initial_messages = [
412
- AIMessage(content=self.system_prompt), # Start with System prompt guidance (as an AI message often works well with tool calling)
413
- HumanMessage(content=question)
414
- ]
415
- print("Invoking LangGraph agent...")
416
- # Pass task_id in the initial state
417
- final_state = self.agent_app.invoke(
418
- {"messages": initial_messages, "task_id": task_id},
419
- # config={"recursion_limit": 50} # Optional: increase recursion depth
420
- )
421
- print("LangGraph agent finished.")
422
- # print(f"Final state messages: {final_state['messages']}") # Can be verbose
423
-
424
- # Extract the final answer from the last AIMessage
425
- final_message = final_state['messages'][-1]
426
- if isinstance(final_message, AIMessage):
427
- final_answer = final_message.content
428
- print(f"Agent returning answer: {final_answer}")
429
- return final_answer
430
- else:
431
- # If the last message isn't an AIMessage, it means the agent didn't converge
432
- # or the routing is incorrect. Return the last message content or an error.
433
- # This might happen if the agent ends after a tool call and doesn't produce a final AIMessage.
434
- error_message = f"Agent finished in an unexpected state. Last message type: {type(final_message)}. Content: {final_message}"
435
- print(error_message)
436
- # Attempt to extract *any* useful content from the last message if not AIMessage
437
- content_attempt = ""
438
- if hasattr(final_message, 'content'):
439
- content_attempt = str(final_message.content)
440
- elif hasattr(final_message, 'tool_outputs'):
441
- content_attempt = "Tool Outputs: " + json.dumps(final_message.tool_outputs)
442
- else:
443
- content_attempt = str(final_message)
444
-
445
- return f"Agent finished without a final answer message. Last output: {content_attempt[:500]}"
446
-
447
- except Exception as e:
448
- print(f"Error during agent execution: {e}")
449
- import traceback
450
- traceback.print_exc()
451
- return f"An error occurred during agent execution: {e}"
452
-
453
-
454
- # --- Rest of the original code (kept mostly as is) ---
455
 
456
  def run_and_submit_all( profile: gr.OAuthProfile | None):
457
  """
458
- Fetches all questions, runs the GaiaAgent on them, submits all answers,
459
  and displays the results.
460
  """
461
- space_id = os.getenv("SPACE_ID")
 
462
 
463
  if profile:
464
  username= f"{profile.username}"
@@ -467,30 +57,24 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
467
  print("User not logged in.")
468
  return "Please Login to Hugging Face with the button.", None
469
 
470
- if missing_vars:
471
- return f"Missing required environment variables: {', '.join(missing_vars)}. Please set them in your Space settings.", None
472
-
473
  api_url = DEFAULT_API_URL
474
  questions_url = f"{api_url}/questions"
475
  submit_url = f"{api_url}/submit"
476
 
477
- # 1. Instantiate Agent
478
  try:
479
- agent = GaiaAgent()
480
- print("GaiaAgent instantiated successfully.")
481
  except Exception as e:
482
  print(f"Error instantiating agent: {e}")
483
- import traceback
484
- traceback.print_exc()
485
- return f"Error initializing agent: {e}\nCheck your environment variables.", None
486
-
487
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local development environment - No Space ID"
488
- print(f"Agent code link: {agent_code}")
489
 
490
  # 2. Fetch Questions
491
  print(f"Fetching questions from: {questions_url}")
492
  try:
493
- response = requests.get(questions_url, timeout=30)
494
  response.raise_for_status()
495
  questions_data = response.json()
496
  if not questions_data:
@@ -512,52 +96,33 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
512
  results_log = []
513
  answers_payload = []
514
  print(f"Running agent on {len(questions_data)} questions...")
515
- start_time = time.time()
516
- question_counter = 0
517
-
518
  for item in questions_data:
519
- question_counter += 1
520
  task_id = item.get("task_id")
521
  question_text = item.get("question")
522
- print(f"\n--- Processing Task ID: {task_id} ({question_counter}/{len(questions_data)}) ---")
523
- print(f"Question: {question_text[:150]}...")
524
-
525
  if not task_id or question_text is None:
526
  print(f"Skipping item with missing task_id or question: {item}")
527
- results_log.append({"Task ID": "N/A", "Question": str(item), "Submitted Answer": "Skipped (missing data)"})
528
  continue
529
-
530
- submitted_answer = "Agent failed to run." # Default in case of crash
531
  try:
532
- # Pass task_id to the agent's call method
533
- submitted_answer = agent(task_id, question_text)
534
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
535
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
536
  except Exception as e:
537
- print(f"FATAL ERROR running agent on task {task_id}: {e}")
538
- import traceback
539
- traceback.print_exc()
540
- error_answer = f"AGENT CRASHED on task {task_id}: {e}"
541
- answers_payload.append({"task_id": task_id, "submitted_answer": error_answer}) # Submit error answer
542
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_answer})
543
-
544
- end_time = time.time()
545
- total_duration = end_time - start_time
546
- print(f"\nFinished running agent on {len(questions_data)} questions in {total_duration:.2f} seconds.")
547
 
548
  if not answers_payload:
549
  print("Agent did not produce any answers to submit.")
550
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
551
 
552
- # 4. Prepare Submission
553
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
554
- status_update = f"Agent finished running. Submitting {len(answers_payload)} answers for user '{username}'..."
555
  print(status_update)
556
 
557
  # 5. Submit
558
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
559
  try:
560
- response = requests.post(submit_url, json=submission_data, timeout=180) # Increased timeout further
561
  response.raise_for_status()
562
  result_data = response.json()
563
  final_status = (
@@ -600,28 +165,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
600
 
601
  # --- Build Gradio Interface using Blocks ---
602
  with gr.Blocks() as demo:
603
- gr.Markdown("# GAIA Level 1 Agent Evaluation Runner")
604
  gr.Markdown(
605
  """
606
  **Instructions:**
607
-
608
- 1. **Clone this space.**
609
- 2. **Set Environment Variables:** Go to the Space settings and add your `OPENAI_API_KEY` and `TAVILY_API_KEY` as secret variables. These are required for the agent to use the LLM and search tools.
610
- 3. Modify the `GaiaAgent` code (above) to improve its reasoning, tool use, and prompt engineering.
611
- 4. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
612
- 5. Click 'Run Evaluation & Submit All Answers' to fetch questions (with task IDs), run your agent (which can now download files via the API using the task ID), submit answers, and see the score.
613
-
614
  ---
615
- **Agent Details:**
616
- This agent uses `langgraph` and `langchain`. It has access to:
617
- * Web search via Tavily (`tavily_search`).
618
- * Listing local files (`list_local_files`).
619
- * Reading local files (`read_local_file`).
620
- * **Downloading and reading files provided with a task via the evaluation API (`download_and_read_file`), using the task ID.**
621
- * A capable LLM (OpenAI's `gpt-4o-mini` by default, requires `OPENAI_API_KEY`).
622
-
623
  **Disclaimers:**
624
- Running the evaluation can take several minutes as the agent processes each question. The provided agent is a starting point; significant prompt engineering, tool refinement, and potentially more sophisticated graph logic will be needed to reliably score above 30%.
 
625
  """
626
  )
627
 
@@ -630,7 +184,8 @@ with gr.Blocks() as demo:
630
  run_button = gr.Button("Run Evaluation & Submit All Answers")
631
 
632
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
633
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, row_selectable=False)
 
634
 
635
  run_button.click(
636
  fn=run_and_submit_all,
@@ -639,8 +194,9 @@ with gr.Blocks() as demo:
639
 
640
  if __name__ == "__main__":
641
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
642
  space_host_startup = os.getenv("SPACE_HOST")
643
- space_id_startup = os.getenv("SPACE_ID")
644
 
645
  if space_host_startup:
646
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -648,22 +204,14 @@ if __name__ == "__main__":
648
  else:
649
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
650
 
651
- if space_id_startup:
652
  print(f"✅ SPACE_ID found: {space_id_startup}")
653
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
654
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
655
  else:
656
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
657
 
658
- if missing_vars:
659
- print("\n" + "="*50)
660
- print("WARNING: Launching interface but required API keys are missing.")
661
- print("Set OPENAI_API_KEY and TAVILY_API_KEY in environment variables.")
662
- print("The agent WILL FAIL without these.")
663
- print("="*50 + "\n")
664
-
665
-
666
  print("-"*(60 + len(" App Starting ")) + "\n")
667
 
668
- print("Launching Gradio Interface for GAIA Level 1 Agent Evaluation...")
669
  demo.launch(debug=True, share=False)
 
1
+ """ Basic Agent Evaluation Runner"""
2
  import os
3
+ import inspect
4
  import gradio as gr
5
+ import re
6
  import requests
 
7
  import pandas as pd
8
+ from langchain_core.messages import HumanMessage
9
+ from agent import build_graph
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
11
 
12
+ # (Keep Constants as is)
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # --- Basic Agent Definition ---
17
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
18
 
 
 
 
 
 
 
19
 
20
+ class BasicAgent:
21
+ """A langgraph agent."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def __init__(self):
23
+ print("BasicAgent initialized.")
24
+ self.graph = build_graph()
25
+
26
+ def __call__(self, question: str) -> str:
27
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
28
+ # Wrap the question in a HumanMessage from langchain_core
29
+ messages = [HumanMessage(content=question)]
30
+ messages = self.graph.invoke({"messages": messages})
31
+ answer = messages['messages'][-1].content
32
+ return parse_final_answer(answer)
33
+
34
+ def parse_final_answer(answer: str) -> str:
35
+ """
36
+ Parses the final answer from the agent's response.
37
+ This function is a placeholder and should be modified based on the actual output format of your agent.
38
+ """
39
+ match = re.search(r'FINAL ANSWER:\s*(.*)', answer)
40
+ if match:
41
+ final_answer = match.group(1)
42
+ return final_answer.strip()
43
+ return answer.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def run_and_submit_all( profile: gr.OAuthProfile | None):
46
  """
47
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
48
  and displays the results.
49
  """
50
+ # --- Determine HF Space Runtime URL and Repo URL ---
51
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
52
 
53
  if profile:
54
  username= f"{profile.username}"
 
57
  print("User not logged in.")
58
  return "Please Login to Hugging Face with the button.", None
59
 
 
 
 
60
  api_url = DEFAULT_API_URL
61
  questions_url = f"{api_url}/questions"
62
  submit_url = f"{api_url}/submit"
63
 
64
+ # 1. Instantiate Agent ( modify this part to create your agent)
65
  try:
66
+ agent = BasicAgent()
 
67
  except Exception as e:
68
  print(f"Error instantiating agent: {e}")
69
+ return f"Error initializing agent: {e}", None
70
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
71
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
72
+ print(agent_code)
 
 
73
 
74
  # 2. Fetch Questions
75
  print(f"Fetching questions from: {questions_url}")
76
  try:
77
+ response = requests.get(questions_url, timeout=15)
78
  response.raise_for_status()
79
  questions_data = response.json()
80
  if not questions_data:
 
96
  results_log = []
97
  answers_payload = []
98
  print(f"Running agent on {len(questions_data)} questions...")
 
 
 
99
  for item in questions_data:
 
100
  task_id = item.get("task_id")
101
  question_text = item.get("question")
 
 
 
102
  if not task_id or question_text is None:
103
  print(f"Skipping item with missing task_id or question: {item}")
 
104
  continue
 
 
105
  try:
106
+ submitted_answer = agent(question_text)
 
107
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
108
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
109
  except Exception as e:
110
+ print(f"Error running agent on task {task_id}: {e}")
111
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
112
 
113
  if not answers_payload:
114
  print("Agent did not produce any answers to submit.")
115
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
116
 
117
+ # 4. Prepare Submission
118
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
119
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
120
  print(status_update)
121
 
122
  # 5. Submit
123
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
124
  try:
125
+ response = requests.post(submit_url, json=submission_data, timeout=60)
126
  response.raise_for_status()
127
  result_data = response.json()
128
  final_status = (
 
165
 
166
  # --- Build Gradio Interface using Blocks ---
167
  with gr.Blocks() as demo:
168
+ gr.Markdown("# Basic Agent Evaluation Runner")
169
  gr.Markdown(
170
  """
171
  **Instructions:**
172
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
173
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
174
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
 
175
  ---
 
 
 
 
 
 
 
 
176
  **Disclaimers:**
177
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
178
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
179
  """
180
  )
181
 
 
184
  run_button = gr.Button("Run Evaluation & Submit All Answers")
185
 
186
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
187
+ # Removed max_rows=10 from DataFrame constructor
188
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
189
 
190
  run_button.click(
191
  fn=run_and_submit_all,
 
194
 
195
  if __name__ == "__main__":
196
  print("\n" + "-"*30 + " App Starting " + "-"*30)
197
+ # Check for SPACE_HOST and SPACE_ID at startup for information
198
  space_host_startup = os.getenv("SPACE_HOST")
199
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
200
 
201
  if space_host_startup:
202
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
204
  else:
205
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
206
 
207
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
208
  print(f"✅ SPACE_ID found: {space_id_startup}")
209
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
210
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
211
  else:
212
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
213
 
 
 
 
 
 
 
 
 
214
  print("-"*(60 + len(" App Starting ")) + "\n")
215
 
216
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
217
  demo.launch(debug=True, share=False)