sirine1712 commited on
Commit
151223b
ยท
verified ยท
1 Parent(s): 0dd84e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +449 -138
app.py CHANGED
@@ -5,207 +5,518 @@ import pandas as pd
5
  from smolagents import ToolCallingAgent, tool
6
  from duckduckgo_search import DDGS
7
  import math
8
- from datetime import datetime
9
  import re
 
 
 
10
 
11
  # --- Enhanced Tools ---
12
  @tool
13
- def enhanced_search(query: str, num_results: int = 3) -> str:
14
- """Performs web search with result filtering and quality checks.
 
15
 
16
  Args:
17
- query: The search query string to look up.
18
- num_results: Number of results to return (default 3).
19
 
20
  Returns:
21
- A formatted string containing the search results or error message.
22
  """
23
  try:
24
  with DDGS() as ddgs:
25
- results = ddgs.text(query, max_results=num_results)
26
- filtered = [
27
- f"## {r['title']}\n{r['body']}\nURL: {r['href']}"
28
- for r in results
29
- if len(r['body']) > 30 and not any(
30
- kw in r['title'].lower()
31
- for kw in ['advertisement', 'sponsored', 'ad', 'buy']
 
 
 
 
 
32
  )
33
- ]
34
- return "\n\n".join(filtered) if filtered else "No quality results found."
35
  except Exception as e:
36
  return f"Search error: {e}"
37
 
38
  @tool
39
- def scientific_calculator(expression: str) -> str:
40
- """Evaluates mathematical expressions with scientific functions.
 
41
 
42
  Args:
43
- expression: The mathematical expression to evaluate.
 
44
 
45
  Returns:
46
- The result as a string or error message.
47
  """
48
- allowed_names = {k: v for k, v in math.__dict__.items() if not k.startswith("__")}
49
  try:
50
- result = eval(expression, {"__builtins__": {}}, allowed_names)
51
- return str(round(result, 6)) if isinstance(result, float) else str(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
- return f"Calculation error: {e}"
54
 
55
  @tool
56
- def get_current_date() -> str:
57
- """Gets the current date and time.
 
 
 
 
58
 
59
  Returns:
60
- Current datetime in YYYY-MM-DD HH:MM:SS format.
61
  """
62
- return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  @tool
65
- def unit_converter(amount: float, from_unit: str, to_unit: str) -> str:
66
- """Converts between common measurement units.
 
67
 
68
  Args:
69
- amount: The numerical value to convert.
70
- from_unit: The source unit (e.g., 'miles').
71
- to_unit: The target unit (e.g., 'kilometers').
72
 
73
  Returns:
74
- The converted value with unit or error message.
75
  """
76
- conversions = {
77
- ('miles', 'kilometers'): lambda x: x * 1.60934,
78
- ('pounds', 'kilograms'): lambda x: x * 0.453592,
79
- ('fahrenheit', 'celsius'): lambda x: (x - 32) * 5/9,
80
- }
81
- key = (from_unit.lower(), to_unit.lower())
82
- if key in conversions:
83
- try:
84
- result = conversions[key](float(amount))
85
- return f"{round(result, 4)} {to_unit}"
86
- except:
87
- return "Invalid amount"
88
- return f"Unsupported conversion: {from_unit} โ†’ {to_unit}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- # --- Agent Core ---
92
- class GAIAAgent:
93
  def __init__(self):
94
- self.agent = ToolCallingAgent(
95
- name="GAIA-HF-Agent",
96
- description="Specialized agent for GAIA tasks",
97
- tools=[enhanced_search, scientific_calculator, get_current_date, unit_converter],
98
- model="gpt-4-turbo", # or "gpt-3.5-turbo" if unavailable
99
- planning_interval=5,
100
- max_iterations=10
101
- )
102
- self.session_history = []
103
-
104
- def preprocess_question(self, question: str) -> str:
105
- """Clean GAIA questions"""
106
- question = re.sub(r'\[\d+\]', '', question) # Remove citations
107
- question = question.replace("(a)", "").replace("(b)", "") # Remove options
108
- return question.strip()
109
-
110
- def postprocess_answer(self, answer: str) -> str:
111
- """Extract most precise answer"""
112
- # Extract numbers/dates from longer answers
113
- numbers = re.findall(r'\d+\.?\d*', answer)
114
- dates = re.findall(r'\d{4}-\d{2}-\d{2}', answer)
115
- if dates:
116
- return dates[-1]
117
- if numbers:
118
- return numbers[-1]
119
- return answer[:500] # Limit length
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  def __call__(self, question: str) -> str:
122
- clean_q = self.preprocess_question(question)
123
- print(f"Processing: {clean_q}")
 
 
 
 
 
 
 
 
124
 
125
  try:
126
- answer = self.agent.run(clean_q)
127
- processed = self.postprocess_answer(answer)
128
- self.session_history.append((question, processed))
129
- return processed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  except Exception as e:
131
- return f"Agent error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- # --- HF Space Integration ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
135
 
136
- def run_and_submit(profile: gr.OAuthProfile | None):
137
- if not profile:
138
- return "Please log in to submit", None
139
-
140
  space_id = os.getenv("SPACE_ID")
141
- agent = GAIAAgent()
142
-
143
- # Fetch questions
 
 
 
 
 
 
 
 
 
 
 
 
144
  try:
145
- response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
 
146
  questions = response.json()
147
  if not questions:
148
- return "No questions received", None
 
 
 
149
  except Exception as e:
150
- return f"Failed to get questions: {e}", None
151
 
152
- # Process questions
153
- results = []
154
- answers = []
155
- for item in questions[:20]: # Limit to 20 for testing
156
  task_id = item.get("task_id")
157
  question = item.get("question")
 
158
  if not task_id or not question:
159
  continue
160
 
161
- answer = agent(question)
162
- results.append({
163
- "Task ID": task_id,
164
- "Question": question,
165
- "Answer": answer
166
- })
167
- answers.append({
168
- "task_id": task_id,
169
- "submitted_answer": answer
170
- })
171
-
172
- # Submit answers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  try:
174
- response = requests.post(
175
- f"{DEFAULT_API_URL}/submit",
176
- json={
177
- "username": profile.username,
178
- "agent_code": f"https://huggingface.co/spaces/{space_id}",
179
- "answers": answers
180
- },
181
- timeout=60
 
 
 
 
 
 
 
 
 
 
 
182
  )
183
- data = response.json()
184
- return (
185
- f"โœ… Submitted {len(answers)} answers\n"
186
- f"Score: {data.get('score', 'N/A')}%\n"
187
- f"Correct: {data.get('correct_count', '?')}/{data.get('total_attempted', '?')}\n"
188
- f"Message: {data.get('message', '')}",
189
- pd.DataFrame(results))
190
- except Exception as e:
191
- return f"Submission failed: {e}", pd.DataFrame(results)
192
 
193
- # --- Gradio UI ---
194
- with gr.Blocks(title="GAIA Agent") as demo:
195
- gr.Markdown("## ๐Ÿš€ GAIA Task Agent")
196
- gr.Markdown("Login and click submit to run evaluation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
- login = gr.LoginButton()
199
- submit_btn = gr.Button("Run & Submit Answers", variant="primary")
200
 
201
- status = gr.Textbox(label="Submission Status", interactive=False)
202
- results = gr.DataFrame(label="Processed Answers")
203
 
204
- submit_btn.click(
205
- fn=run_and_submit,
206
- inputs=None,
207
- outputs=[status, results]
 
 
 
208
  )
209
 
210
  if __name__ == "__main__":
211
- demo.launch(debug=True)
 
5
  from smolagents import ToolCallingAgent, tool
6
  from duckduckgo_search import DDGS
7
  import math
8
+ import openai
9
  import re
10
+ import json
11
+ from datetime import datetime, timedelta
12
+ import time
13
 
14
  # --- Enhanced Tools ---
15
  @tool
16
+ def duck_search(query: str) -> str:
17
+ """
18
+ Searches the web using DuckDuckGo and returns detailed information.
19
 
20
  Args:
21
+ query: The search query string.
 
22
 
23
  Returns:
24
+ A string with comprehensive search results including titles, snippets, and URLs.
25
  """
26
  try:
27
  with DDGS() as ddgs:
28
+ results = ddgs.text(query, max_results=5) # Increased results
29
+ if not results:
30
+ return "No results found."
31
+
32
+ formatted_results = []
33
+ for i, r in enumerate(results, 1):
34
+ formatted_results.append(
35
+ f"Result {i}:\n"
36
+ f"Title: {r['title']}\n"
37
+ f"Content: {r['body']}\n"
38
+ f"URL: {r['href']}\n"
39
+ f"---"
40
  )
41
+ return "\n".join(formatted_results)
 
42
  except Exception as e:
43
  return f"Search error: {e}"
44
 
45
  @tool
46
+ def focused_search(query: str, topic: str = "") -> str:
47
+ """
48
+ Performs a more focused search with specific keywords for better results.
49
 
50
  Args:
51
+ query: The main search query
52
+ topic: Additional topic context to improve search accuracy
53
 
54
  Returns:
55
+ Focused search results
56
  """
 
57
  try:
58
+ # Enhance query with topic context
59
+ enhanced_query = f"{query} {topic}".strip()
60
+
61
+ with DDGS() as ddgs:
62
+ results = ddgs.text(enhanced_query, max_results=3)
63
+ if not results:
64
+ # Try alternative search if no results
65
+ results = ddgs.text(query, max_results=3)
66
+
67
+ if not results:
68
+ return "No results found for focused search."
69
+
70
+ summaries = []
71
+ for r in results:
72
+ summaries.append(f"**{r['title']}**\n{r['body']}\nSource: {r['href']}")
73
+
74
+ return "\n\n".join(summaries)
75
  except Exception as e:
76
+ return f"Focused search error: {e}"
77
 
78
  @tool
79
+ def advanced_calculator(expression: str) -> str:
80
+ """
81
+ Enhanced calculator with support for complex mathematical operations.
82
+
83
+ Args:
84
+ expression: A mathematical expression or calculation
85
 
86
  Returns:
87
+ The calculated result with detailed steps when possible
88
  """
89
+ try:
90
+ # Clean the expression
91
+ expression = expression.strip()
92
+
93
+ # Handle common mathematical functions and constants
94
+ safe_dict = {
95
+ "__builtins__": {},
96
+ **math.__dict__,
97
+ "abs": abs,
98
+ "round": round,
99
+ "min": min,
100
+ "max": max,
101
+ "sum": sum,
102
+ "pow": pow,
103
+ }
104
+
105
+ # Try to evaluate the expression
106
+ result = eval(expression, safe_dict)
107
+
108
+ # Format the result nicely
109
+ if isinstance(result, float):
110
+ if result.is_integer():
111
+ return str(int(result))
112
+ else:
113
+ return f"{result:.10g}" # Remove trailing zeros
114
+
115
+ return str(result)
116
+
117
+ except Exception as e:
118
+ # Try to handle percentage calculations
119
+ if "%" in expression:
120
+ try:
121
+ # Convert percentage expressions
122
+ expr_mod = expression.replace("%", "/100")
123
+ result = eval(expr_mod, safe_dict)
124
+ return str(result)
125
+ except:
126
+ pass
127
+
128
+ return f"Calculation error: {e}. Please check the mathematical expression."
129
 
130
  @tool
131
+ def date_calculator(date_expression: str) -> str:
132
+ """
133
+ Calculates dates, time differences, and handles date-related queries.
134
 
135
  Args:
136
+ date_expression: A date calculation or query
 
 
137
 
138
  Returns:
139
+ The calculated date or time difference
140
  """
141
+ try:
142
+ current_date = datetime.now()
143
+
144
+ # Handle relative date expressions
145
+ if "days ago" in date_expression.lower():
146
+ days_match = re.search(r'(\d+)\s*days?\s*ago', date_expression.lower())
147
+ if days_match:
148
+ days = int(days_match.group(1))
149
+ target_date = current_date - timedelta(days=days)
150
+ return target_date.strftime("%Y-%m-%d (%A)")
151
+
152
+ elif "days from now" in date_expression.lower():
153
+ days_match = re.search(r'(\d+)\s*days?\s*from\s*now', date_expression.lower())
154
+ if days_match:
155
+ days = int(days_match.group(1))
156
+ target_date = current_date + timedelta(days=days)
157
+ return target_date.strftime("%Y-%m-%d (%A)")
158
+
159
+ elif "weeks ago" in date_expression.lower():
160
+ weeks_match = re.search(r'(\d+)\s*weeks?\s*ago', date_expression.lower())
161
+ if weeks_match:
162
+ weeks = int(weeks_match.group(1))
163
+ target_date = current_date - timedelta(weeks=weeks)
164
+ return target_date.strftime("%Y-%m-%d (%A)")
165
+
166
+ # Current date info
167
+ elif "today" in date_expression.lower() or "current date" in date_expression.lower():
168
+ return current_date.strftime("%Y-%m-%d (%A)")
169
+
170
+ return f"Current date: {current_date.strftime('%Y-%m-%d (%A)')}"
171
+
172
+ except Exception as e:
173
+ return f"Date calculation error: {e}"
174
 
175
+ @tool
176
+ def text_analyzer(text: str) -> str:
177
+ """
178
+ Analyzes text for patterns, extracts information, and provides insights.
179
+
180
+ Args:
181
+ text: The text to analyze
182
+
183
+ Returns:
184
+ Analysis results including word count, patterns, and extracted information
185
+ """
186
+ try:
187
+ if not text:
188
+ return "No text provided for analysis."
189
+
190
+ # Basic statistics
191
+ word_count = len(text.split())
192
+ char_count = len(text)
193
+ sentence_count = len([s for s in text.split('.') if s.strip()])
194
+
195
+ # Extract numbers
196
+ numbers = re.findall(r'-?\d+(?:\.\d+)?', text)
197
+
198
+ # Extract dates
199
+ date_patterns = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b', text)
200
+
201
+ # Extract emails
202
+ emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
203
+
204
+ analysis = f"Text Analysis:\n"
205
+ analysis += f"- Words: {word_count}\n"
206
+ analysis += f"- Characters: {char_count}\n"
207
+ analysis += f"- Sentences: {sentence_count}\n"
208
+
209
+ if numbers:
210
+ analysis += f"- Numbers found: {', '.join(numbers[:10])}{'...' if len(numbers) > 10 else ''}\n"
211
+
212
+ if date_patterns:
213
+ analysis += f"- Dates found: {', '.join(date_patterns)}\n"
214
+
215
+ if emails:
216
+ analysis += f"- Emails found: {', '.join(emails)}\n"
217
+
218
+ return analysis
219
+
220
+ except Exception as e:
221
+ return f"Text analysis error: {e}"
222
 
223
+ # --- Enhanced Agent ---
224
+ class ImprovedWebSearchAgent:
225
  def __init__(self):
226
+ """Initialize the enhanced agent with better reasoning capabilities."""
227
+
228
+ # Use more powerful model if available
229
+ model_name = "gpt-4o-mini" # Fallback to gpt-3.5-turbo if needed
230
+
231
+ # Enhanced system prompt for better reasoning
232
+ system_prompt = """You are an advanced AI assistant designed to solve complex problems by breaking them down systematically.
233
+
234
+ Key capabilities:
235
+ 1. **Multi-step Reasoning**: Break complex problems into smaller, manageable steps
236
+ 2. **Information Synthesis**: Combine information from multiple sources
237
+ 3. **Verification**: Double-check calculations and facts
238
+ 4. **Context Awareness**: Understand the broader context of questions
239
+
240
+ Problem-solving approach:
241
+ 1. Analyze the question carefully to understand what's being asked
242
+ 2. Identify what information you need to find
243
+ 3. Use available tools strategically (search, calculate, analyze)
244
+ 4. Verify your findings and reasoning
245
+ 5. Provide a clear, accurate answer
246
+
247
+ When using tools:
248
+ - Use focused_search for specific factual information
249
+ - Use duck_search for broader context
250
+ - Use advanced_calculator for any mathematical operations
251
+ - Use date_calculator for time-related queries
252
+ - Use text_analyzer when you need to extract information from text
253
+
254
+ Always think step-by-step and explain your reasoning process."""
255
+
256
+ try:
257
+ self.agent = ToolCallingAgent(
258
+ name="ImprovedGAIAAgent",
259
+ description=system_prompt,
260
+ tools=[duck_search, focused_search, advanced_calculator, date_calculator, text_analyzer],
261
+ model=model_name,
262
+ planning_interval=3, # More frequent planning
263
+ )
264
+ print(f"โœ… Enhanced agent initialized with {model_name}")
265
+ except Exception as e:
266
+ print(f"โš ๏ธ Error initializing with {model_name}, trying fallback...")
267
+ try:
268
+ self.agent = ToolCallingAgent(
269
+ name="ImprovedGAIAAgent",
270
+ description=system_prompt,
271
+ tools=[duck_search, focused_search, advanced_calculator, date_calculator, text_analyzer],
272
+ model="gpt-3.5-turbo",
273
+ planning_interval=3,
274
+ )
275
+ print("โœ… Enhanced agent initialized with gpt-3.5-turbo")
276
+ except Exception as e2:
277
+ print(f"โŒ Agent initialization failed: {e2}")
278
+ raise e2
279
 
280
  def __call__(self, question: str) -> str:
281
+ """
282
+ Process a question with enhanced reasoning and error handling.
283
+
284
+ Args:
285
+ question: The question to answer
286
+
287
+ Returns:
288
+ A comprehensive answer
289
+ """
290
+ print(f"๐Ÿ” Processing question: {question}")
291
 
292
  try:
293
+ # Add some preprocessing to understand question type
294
+ question_lower = question.lower()
295
+
296
+ # Enhance the question with context clues
297
+ enhanced_question = self._enhance_question(question)
298
+
299
+ # Run the agent with timeout protection
300
+ start_time = time.time()
301
+ max_time = 120 # 2 minutes max per question
302
+
303
+ result = self.agent.run(enhanced_question)
304
+
305
+ elapsed_time = time.time() - start_time
306
+ print(f"โฑ๏ธ Question processed in {elapsed_time:.1f} seconds")
307
+
308
+ # Post-process the result
309
+ final_answer = self._post_process_answer(result, question)
310
+
311
+ return final_answer
312
+
313
  except Exception as e:
314
+ print(f"โŒ Agent error: {e}")
315
+ # Try a simpler approach as fallback
316
+ return self._fallback_answer(question, str(e))
317
+
318
+ def _enhance_question(self, question: str) -> str:
319
+ """Add context and instructions to improve question processing."""
320
+
321
+ enhanced = f"""Please solve this step by step:
322
+
323
+ Question: {question}
324
+
325
+ Instructions:
326
+ 1. Read the question carefully and identify what type of answer is needed
327
+ 2. Break down complex problems into steps
328
+ 3. Use the available tools to gather information or perform calculations
329
+ 4. Verify your answer makes sense
330
+ 5. Provide a clear, concise final answer
331
+
332
+ If this is a factual question, search for current information.
333
+ If this involves calculations, show your work.
334
+ If this requires multiple steps, explain each step clearly."""
335
 
336
+ return enhanced
337
+
338
+ def _post_process_answer(self, result: str, original_question: str) -> str:
339
+ """Clean and improve the agent's response."""
340
+
341
+ if not result or len(result.strip()) < 10:
342
+ return f"I need more information to properly answer: {original_question}"
343
+
344
+ # Clean up the response
345
+ result = result.strip()
346
+
347
+ # Ensure we have a clear answer
348
+ if "final answer" not in result.lower() and "answer:" not in result.lower():
349
+ # Try to extract the most relevant part
350
+ lines = result.split('\n')
351
+ if lines:
352
+ # Look for the most substantive line as the answer
353
+ best_line = max(lines, key=len, default=result)
354
+ if len(best_line) > 20:
355
+ result = f"{result}\n\nFinal Answer: {best_line}"
356
+
357
+ return result
358
+
359
+ def _fallback_answer(self, question: str, error: str) -> str:
360
+ """Provide a fallback response when the main agent fails."""
361
+
362
+ question_lower = question.lower()
363
+
364
+ # Try simple keyword-based responses for common question types
365
+ if any(word in question_lower for word in ['calculate', 'math', '+', '-', '*', '/', 'equals']):
366
+ return f"This appears to be a mathematical question. Error occurred: {error}. Please verify the calculation manually."
367
+
368
+ elif any(word in question_lower for word in ['when', 'date', 'year', 'time']):
369
+ return f"This appears to be a date/time related question. Error occurred: {error}. Please search for current information."
370
+
371
+ elif any(word in question_lower for word in ['who', 'what', 'where', 'how']):
372
+ return f"This appears to be a factual question. Error occurred: {error}. Please search for current information."
373
+
374
+ else:
375
+ return f"I encountered an error while processing your question: {error}. Please try rephrasing your question."
376
+
377
+ # --- Constants ---
378
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
379
 
380
+ # --- Evaluation & Submission ---
381
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
382
  space_id = os.getenv("SPACE_ID")
383
+ if profile:
384
+ username = profile.username
385
+ print(f"๐Ÿ‘ค User: {username}")
386
+ else:
387
+ return "Please login to Hugging Face.", None
388
+
389
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
390
+ questions_url = f"{DEFAULT_API_URL}/questions"
391
+ submit_url = f"{DEFAULT_API_URL}/submit"
392
+
393
+ try:
394
+ agent = ImprovedWebSearchAgent()
395
+ except Exception as e:
396
+ return f"Agent initialization error: {e}", None
397
+
398
  try:
399
+ response = requests.get(questions_url, timeout=15)
400
+ response.raise_for_status()
401
  questions = response.json()
402
  if not questions:
403
+ return "No questions received.", None
404
+
405
+ print(f"๐Ÿ“ Received {len(questions)} questions")
406
+
407
  except Exception as e:
408
+ return f"Failed to fetch questions: {e}", None
409
 
410
+ results_log = []
411
+ answers_payload = []
412
+
413
+ for i, item in enumerate(questions, 1):
414
  task_id = item.get("task_id")
415
  question = item.get("question")
416
+
417
  if not task_id or not question:
418
  continue
419
 
420
+ print(f"\n๐Ÿ“‹ Processing question {i}/{len(questions)}: {task_id}")
421
+
422
+ try:
423
+ answer = agent(question)
424
+
425
+ # Ensure answer is not empty
426
+ if not answer or len(answer.strip()) < 2:
427
+ answer = "Unable to determine answer from available information."
428
+
429
+ results_log.append({
430
+ "Task ID": task_id,
431
+ "Question": question[:100] + "..." if len(question) > 100 else question,
432
+ "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
433
+ })
434
+
435
+ answers_payload.append({
436
+ "task_id": task_id,
437
+ "submitted_answer": answer
438
+ })
439
+
440
+ print(f"โœ… Answer generated for {task_id}")
441
+
442
+ except Exception as e:
443
+ error_msg = f"Agent error: {str(e)[:100]}"
444
+ print(f"โŒ Error for {task_id}: {error_msg}")
445
+
446
+ results_log.append({
447
+ "Task ID": task_id,
448
+ "Question": question[:100] + "..." if len(question) > 100 else question,
449
+ "Submitted Answer": error_msg
450
+ })
451
+
452
+ answers_payload.append({
453
+ "task_id": task_id,
454
+ "submitted_answer": "Error processing question"
455
+ })
456
+
457
+ if not answers_payload:
458
+ return "No answers were generated.", pd.DataFrame(results_log)
459
+
460
+ print(f"\n๐Ÿš€ Submitting {len(answers_payload)} answers...")
461
+
462
  try:
463
+ response = requests.post(submit_url, json={
464
+ "username": username.strip(),
465
+ "agent_code": agent_code,
466
+ "answers": answers_payload
467
+ }, timeout=120) # Increased timeout
468
+
469
+ response.raise_for_status()
470
+ result = response.json()
471
+
472
+ score = result.get('score', 0)
473
+ correct_count = result.get('correct_count', 0)
474
+ total_attempted = result.get('total_attempted', len(answers_payload))
475
+
476
+ status = (
477
+ f"โœ… Submission Successful!\n"
478
+ f"User: {result.get('username')}\n"
479
+ f"Score: {score}% ({correct_count}/{total_attempted} correct)\n"
480
+ f"Message: {result.get('message', 'No message')}\n"
481
+ f"Total questions processed: {len(questions)}"
482
  )
 
 
 
 
 
 
 
 
 
483
 
484
+ print(f"๐ŸŽฏ Final Score: {score}%")
485
+
486
+ return status, pd.DataFrame(results_log)
487
+
488
+ except Exception as e:
489
+ error_msg = f"โŒ Submission failed: {e}"
490
+ print(error_msg)
491
+ return error_msg, pd.DataFrame(results_log)
492
+
493
+ # --- UI ---
494
+ with gr.Blocks(title="Enhanced GAIA Agent") as demo:
495
+ gr.Markdown("# ๐Ÿค– Enhanced GAIA Agent with Advanced Reasoning")
496
+ gr.Markdown("""
497
+ **Improvements in this version:**
498
+ - ๐Ÿง  Enhanced multi-step reasoning capabilities
499
+ - ๐Ÿ” Multiple specialized search tools
500
+ - ๐Ÿงฎ Advanced calculator with better math support
501
+ - ๐Ÿ“… Date and time calculation tools
502
+ - ๐Ÿ“ Text analysis capabilities
503
+ - โšก Better error handling and fallback mechanisms
504
+ - ๐ŸŽฏ Optimized for GAIA benchmark performance
505
+ """)
506
 
507
+ gr.LoginButton()
 
508
 
509
+ with gr.Row():
510
+ run_btn = gr.Button("๐Ÿš€ Run Enhanced Evaluation & Submit", variant="primary", scale=2)
511
 
512
+ status_box = gr.Textbox(label="๐Ÿ“Š Status & Results", lines=8, interactive=False)
513
+ result_table = gr.DataFrame(label="๐Ÿ“‹ Agent Answers Log", interactive=False)
514
+
515
+ run_btn.click(
516
+ fn=run_and_submit_all,
517
+ outputs=[status_box, result_table],
518
+ show_progress=True
519
  )
520
 
521
  if __name__ == "__main__":
522
+ demo.launch(debug=True, share=False)