sirine1712 commited on
Commit
61a207f
·
verified ·
1 Parent(s): 151223b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -439
app.py CHANGED
@@ -5,518 +5,182 @@ import pandas as pd
5
  from smolagents import ToolCallingAgent, tool
6
  from duckduckgo_search import DDGS
7
  import math
8
- import openai
9
  import re
10
- import json
11
  from datetime import datetime, timedelta
12
  import time
13
 
14
- # --- Enhanced Tools ---
15
  @tool
16
  def duck_search(query: str) -> str:
17
- """
18
- Searches the web using DuckDuckGo and returns detailed information.
19
 
20
  Args:
21
  query: The search query string.
22
 
23
  Returns:
24
- A string with comprehensive search results including titles, snippets, and URLs.
25
  """
26
  try:
27
  with DDGS() as ddgs:
28
- results = ddgs.text(query, max_results=5) # Increased results
29
- if not results:
30
- return "No results found."
31
-
32
- formatted_results = []
33
- for i, r in enumerate(results, 1):
34
- formatted_results.append(
35
- f"Result {i}:\n"
36
- f"Title: {r['title']}\n"
37
- f"Content: {r['body']}\n"
38
- f"URL: {r['href']}\n"
39
- f"---"
40
- )
41
- return "\n".join(formatted_results)
42
- except Exception as e:
43
- return f"Search error: {e}"
44
-
45
- @tool
46
- def focused_search(query: str, topic: str = "") -> str:
47
- """
48
- Performs a more focused search with specific keywords for better results.
49
-
50
- Args:
51
- query: The main search query
52
- topic: Additional topic context to improve search accuracy
53
-
54
- Returns:
55
- Focused search results
56
- """
57
- try:
58
- # Enhance query with topic context
59
- enhanced_query = f"{query} {topic}".strip()
60
-
61
- with DDGS() as ddgs:
62
- results = ddgs.text(enhanced_query, max_results=3)
63
- if not results:
64
- # Try alternative search if no results
65
- results = ddgs.text(query, max_results=3)
66
-
67
- if not results:
68
- return "No results found for focused search."
69
-
70
- summaries = []
71
- for r in results:
72
- summaries.append(f"**{r['title']}**\n{r['body']}\nSource: {r['href']}")
73
-
74
- return "\n\n".join(summaries)
75
  except Exception as e:
76
- return f"Focused search error: {e}"
77
 
78
  @tool
79
  def advanced_calculator(expression: str) -> str:
80
- """
81
- Enhanced calculator with support for complex mathematical operations.
82
 
83
  Args:
84
- expression: A mathematical expression or calculation
85
 
86
  Returns:
87
- The calculated result with detailed steps when possible
88
  """
89
  try:
90
- # Clean the expression
91
- expression = expression.strip()
92
-
93
- # Handle common mathematical functions and constants
94
- safe_dict = {
95
- "__builtins__": {},
96
- **math.__dict__,
97
- "abs": abs,
98
- "round": round,
99
- "min": min,
100
- "max": max,
101
- "sum": sum,
102
- "pow": pow,
103
- }
104
-
105
- # Try to evaluate the expression
106
- result = eval(expression, safe_dict)
107
-
108
- # Format the result nicely
109
- if isinstance(result, float):
110
- if result.is_integer():
111
- return str(int(result))
112
- else:
113
- return f"{result:.10g}" # Remove trailing zeros
114
-
115
  return str(result)
116
-
117
- except Exception as e:
118
- # Try to handle percentage calculations
119
- if "%" in expression:
120
- try:
121
- # Convert percentage expressions
122
- expr_mod = expression.replace("%", "/100")
123
- result = eval(expr_mod, safe_dict)
124
- return str(result)
125
- except:
126
- pass
127
-
128
- return f"Calculation error: {e}. Please check the mathematical expression."
129
-
130
- @tool
131
- def date_calculator(date_expression: str) -> str:
132
- """
133
- Calculates dates, time differences, and handles date-related queries.
134
-
135
- Args:
136
- date_expression: A date calculation or query
137
-
138
- Returns:
139
- The calculated date or time difference
140
- """
141
- try:
142
- current_date = datetime.now()
143
-
144
- # Handle relative date expressions
145
- if "days ago" in date_expression.lower():
146
- days_match = re.search(r'(\d+)\s*days?\s*ago', date_expression.lower())
147
- if days_match:
148
- days = int(days_match.group(1))
149
- target_date = current_date - timedelta(days=days)
150
- return target_date.strftime("%Y-%m-%d (%A)")
151
-
152
- elif "days from now" in date_expression.lower():
153
- days_match = re.search(r'(\d+)\s*days?\s*from\s*now', date_expression.lower())
154
- if days_match:
155
- days = int(days_match.group(1))
156
- target_date = current_date + timedelta(days=days)
157
- return target_date.strftime("%Y-%m-%d (%A)")
158
-
159
- elif "weeks ago" in date_expression.lower():
160
- weeks_match = re.search(r'(\d+)\s*weeks?\s*ago', date_expression.lower())
161
- if weeks_match:
162
- weeks = int(weeks_match.group(1))
163
- target_date = current_date - timedelta(weeks=weeks)
164
- return target_date.strftime("%Y-%m-%d (%A)")
165
-
166
- # Current date info
167
- elif "today" in date_expression.lower() or "current date" in date_expression.lower():
168
- return current_date.strftime("%Y-%m-%d (%A)")
169
-
170
- return f"Current date: {current_date.strftime('%Y-%m-%d (%A)')}"
171
-
172
- except Exception as e:
173
- return f"Date calculation error: {e}"
174
-
175
- @tool
176
- def text_analyzer(text: str) -> str:
177
- """
178
- Analyzes text for patterns, extracts information, and provides insights.
179
-
180
- Args:
181
- text: The text to analyze
182
-
183
- Returns:
184
- Analysis results including word count, patterns, and extracted information
185
- """
186
- try:
187
- if not text:
188
- return "No text provided for analysis."
189
-
190
- # Basic statistics
191
- word_count = len(text.split())
192
- char_count = len(text)
193
- sentence_count = len([s for s in text.split('.') if s.strip()])
194
-
195
- # Extract numbers
196
- numbers = re.findall(r'-?\d+(?:\.\d+)?', text)
197
-
198
- # Extract dates
199
- date_patterns = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b', text)
200
-
201
- # Extract emails
202
- emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
203
-
204
- analysis = f"Text Analysis:\n"
205
- analysis += f"- Words: {word_count}\n"
206
- analysis += f"- Characters: {char_count}\n"
207
- analysis += f"- Sentences: {sentence_count}\n"
208
-
209
- if numbers:
210
- analysis += f"- Numbers found: {', '.join(numbers[:10])}{'...' if len(numbers) > 10 else ''}\n"
211
-
212
- if date_patterns:
213
- analysis += f"- Dates found: {', '.join(date_patterns)}\n"
214
-
215
- if emails:
216
- analysis += f"- Emails found: {', '.join(emails)}\n"
217
-
218
- return analysis
219
-
220
  except Exception as e:
221
- return f"Text analysis error: {e}"
222
 
223
- # --- Enhanced Agent ---
224
- class ImprovedWebSearchAgent:
225
  def __init__(self):
226
- """Initialize the enhanced agent with better reasoning capabilities."""
227
-
228
- # Use more powerful model if available
229
- model_name = "gpt-4o-mini" # Fallback to gpt-3.5-turbo if needed
230
-
231
- # Enhanced system prompt for better reasoning
232
- system_prompt = """You are an advanced AI assistant designed to solve complex problems by breaking them down systematically.
233
-
234
- Key capabilities:
235
- 1. **Multi-step Reasoning**: Break complex problems into smaller, manageable steps
236
- 2. **Information Synthesis**: Combine information from multiple sources
237
- 3. **Verification**: Double-check calculations and facts
238
- 4. **Context Awareness**: Understand the broader context of questions
239
-
240
- Problem-solving approach:
241
- 1. Analyze the question carefully to understand what's being asked
242
- 2. Identify what information you need to find
243
- 3. Use available tools strategically (search, calculate, analyze)
244
- 4. Verify your findings and reasoning
245
- 5. Provide a clear, accurate answer
246
-
247
- When using tools:
248
- - Use focused_search for specific factual information
249
- - Use duck_search for broader context
250
- - Use advanced_calculator for any mathematical operations
251
- - Use date_calculator for time-related queries
252
- - Use text_analyzer when you need to extract information from text
253
-
254
- Always think step-by-step and explain your reasoning process."""
255
-
256
  try:
257
  self.agent = ToolCallingAgent(
258
- name="ImprovedGAIAAgent",
259
- description=system_prompt,
260
- tools=[duck_search, focused_search, advanced_calculator, date_calculator, text_analyzer],
261
- model=model_name,
262
- planning_interval=3, # More frequent planning
263
  )
264
- print(f"✅ Enhanced agent initialized with {model_name}")
265
  except Exception as e:
266
- print(f"⚠️ Error initializing with {model_name}, trying fallback...")
267
- try:
268
- self.agent = ToolCallingAgent(
269
- name="ImprovedGAIAAgent",
270
- description=system_prompt,
271
- tools=[duck_search, focused_search, advanced_calculator, date_calculator, text_analyzer],
272
- model="gpt-3.5-turbo",
273
- planning_interval=3,
274
- )
275
- print("✅ Enhanced agent initialized with gpt-3.5-turbo")
276
- except Exception as e2:
277
- print(f"❌ Agent initialization failed: {e2}")
278
- raise e2
279
 
280
  def __call__(self, question: str) -> str:
281
- """
282
- Process a question with enhanced reasoning and error handling.
283
-
284
- Args:
285
- question: The question to answer
286
-
287
- Returns:
288
- A comprehensive answer
289
- """
290
- print(f"🔍 Processing question: {question}")
291
-
292
  try:
293
- # Add some preprocessing to understand question type
294
- question_lower = question.lower()
295
-
296
- # Enhance the question with context clues
297
- enhanced_question = self._enhance_question(question)
298
-
299
- # Run the agent with timeout protection
300
- start_time = time.time()
301
- max_time = 120 # 2 minutes max per question
302
 
303
- result = self.agent.run(enhanced_question)
 
304
 
305
- elapsed_time = time.time() - start_time
306
- print(f"⏱️ Question processed in {elapsed_time:.1f} seconds")
307
-
308
- # Post-process the result
309
- final_answer = self._post_process_answer(result, question)
310
-
311
- return final_answer
312
 
313
  except Exception as e:
314
- print(f" Agent error: {e}")
315
- # Try a simpler approach as fallback
316
- return self._fallback_answer(question, str(e))
317
-
318
- def _enhance_question(self, question: str) -> str:
319
- """Add context and instructions to improve question processing."""
320
-
321
- enhanced = f"""Please solve this step by step:
322
-
323
- Question: {question}
324
-
325
- Instructions:
326
- 1. Read the question carefully and identify what type of answer is needed
327
- 2. Break down complex problems into steps
328
- 3. Use the available tools to gather information or perform calculations
329
- 4. Verify your answer makes sense
330
- 5. Provide a clear, concise final answer
331
-
332
- If this is a factual question, search for current information.
333
- If this involves calculations, show your work.
334
- If this requires multiple steps, explain each step clearly."""
335
-
336
- return enhanced
337
-
338
- def _post_process_answer(self, result: str, original_question: str) -> str:
339
- """Clean and improve the agent's response."""
340
-
341
- if not result or len(result.strip()) < 10:
342
- return f"I need more information to properly answer: {original_question}"
343
-
344
- # Clean up the response
345
- result = result.strip()
346
-
347
- # Ensure we have a clear answer
348
- if "final answer" not in result.lower() and "answer:" not in result.lower():
349
- # Try to extract the most relevant part
350
- lines = result.split('\n')
351
- if lines:
352
- # Look for the most substantive line as the answer
353
- best_line = max(lines, key=len, default=result)
354
- if len(best_line) > 20:
355
- result = f"{result}\n\nFinal Answer: {best_line}"
356
-
357
- return result
358
-
359
- def _fallback_answer(self, question: str, error: str) -> str:
360
- """Provide a fallback response when the main agent fails."""
361
-
362
- question_lower = question.lower()
363
-
364
- # Try simple keyword-based responses for common question types
365
- if any(word in question_lower for word in ['calculate', 'math', '+', '-', '*', '/', 'equals']):
366
- return f"This appears to be a mathematical question. Error occurred: {error}. Please verify the calculation manually."
367
-
368
- elif any(word in question_lower for word in ['when', 'date', 'year', 'time']):
369
- return f"This appears to be a date/time related question. Error occurred: {error}. Please search for current information."
370
-
371
- elif any(word in question_lower for word in ['who', 'what', 'where', 'how']):
372
- return f"This appears to be a factual question. Error occurred: {error}. Please search for current information."
373
-
374
- else:
375
- return f"I encountered an error while processing your question: {error}. Please try rephrasing your question."
376
-
377
- # --- Constants ---
378
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
379
 
380
  # --- Evaluation & Submission ---
381
  def run_and_submit_all(profile: gr.OAuthProfile | None):
382
- space_id = os.getenv("SPACE_ID")
383
- if profile:
384
- username = profile.username
385
- print(f"👤 User: {username}")
386
- else:
387
  return "Please login to Hugging Face.", None
388
 
389
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
390
- questions_url = f"{DEFAULT_API_URL}/questions"
391
- submit_url = f"{DEFAULT_API_URL}/submit"
392
-
393
  try:
394
- agent = ImprovedWebSearchAgent()
395
  except Exception as e:
396
- return f"Agent initialization error: {e}", None
397
 
398
  try:
399
- response = requests.get(questions_url, timeout=15)
400
- response.raise_for_status()
 
 
401
  questions = response.json()
402
  if not questions:
403
  return "No questions received.", None
404
-
405
- print(f"📝 Received {len(questions)} questions")
406
-
407
  except Exception as e:
408
- return f"Failed to fetch questions: {e}", None
409
 
410
- results_log = []
411
- answers_payload = []
412
-
413
- for i, item in enumerate(questions, 1):
414
  task_id = item.get("task_id")
415
  question = item.get("question")
416
 
417
  if not task_id or not question:
418
  continue
419
 
420
- print(f"\n📋 Processing question {i}/{len(questions)}: {task_id}")
421
-
422
  try:
423
  answer = agent(question)
424
-
425
- # Ensure answer is not empty
426
- if not answer or len(answer.strip()) < 2:
427
- answer = "Unable to determine answer from available information."
428
-
429
- results_log.append({
430
- "Task ID": task_id,
431
- "Question": question[:100] + "..." if len(question) > 100 else question,
432
- "Submitted Answer": answer[:200] + "..." if len(answer) > 200 else answer
433
- })
434
-
435
- answers_payload.append({
436
- "task_id": task_id,
437
  "submitted_answer": answer
438
  })
439
-
440
- print(f" Answer generated for {task_id}")
441
-
 
 
442
  except Exception as e:
443
- error_msg = f"Agent error: {str(e)[:100]}"
444
- print(f"❌ Error for {task_id}: {error_msg}")
445
-
446
- results_log.append({
447
- "Task ID": task_id,
448
- "Question": question[:100] + "..." if len(question) > 100 else question,
449
- "Submitted Answer": error_msg
450
  })
451
-
452
- answers_payload.append({
453
- "task_id": task_id,
454
- "submitted_answer": "Error processing question"
455
  })
456
 
457
- if not answers_payload:
458
- return "No answers were generated.", pd.DataFrame(results_log)
459
-
460
- print(f"\n🚀 Submitting {len(answers_payload)} answers...")
461
-
462
  try:
463
- response = requests.post(submit_url, json={
464
- "username": username.strip(),
465
- "agent_code": agent_code,
466
- "answers": answers_payload
467
- }, timeout=120) # Increased timeout
468
-
469
- response.raise_for_status()
470
- result = response.json()
471
-
472
- score = result.get('score', 0)
473
- correct_count = result.get('correct_count', 0)
474
- total_attempted = result.get('total_attempted', len(answers_payload))
475
-
476
- status = (
477
- f" Submission Successful!\n"
478
- f"User: {result.get('username')}\n"
479
- f"Score: {score}% ({correct_count}/{total_attempted} correct)\n"
480
- f"Message: {result.get('message', 'No message')}\n"
481
- f"Total questions processed: {len(questions)}"
482
  )
483
-
484
- print(f"🎯 Final Score: {score}%")
485
-
486
- return status, pd.DataFrame(results_log)
487
-
488
  except Exception as e:
489
- error_msg = f"Submission failed: {e}"
490
- print(error_msg)
491
- return error_msg, pd.DataFrame(results_log)
492
 
493
- # --- UI ---
494
- with gr.Blocks(title="Enhanced GAIA Agent") as demo:
495
- gr.Markdown("# 🤖 Enhanced GAIA Agent with Advanced Reasoning")
496
- gr.Markdown("""
497
- **Improvements in this version:**
498
- - 🧠 Enhanced multi-step reasoning capabilities
499
- - 🔍 Multiple specialized search tools
500
- - 🧮 Advanced calculator with better math support
501
- - 📅 Date and time calculation tools
502
- - 📝 Text analysis capabilities
503
- - ⚡ Better error handling and fallback mechanisms
504
- - 🎯 Optimized for GAIA benchmark performance
505
- """)
506
-
507
  gr.LoginButton()
 
 
 
508
 
509
- with gr.Row():
510
- run_btn = gr.Button("🚀 Run Enhanced Evaluation & Submit", variant="primary", scale=2)
511
-
512
- status_box = gr.Textbox(label="📊 Status & Results", lines=8, interactive=False)
513
- result_table = gr.DataFrame(label="📋 Agent Answers Log", interactive=False)
514
-
515
- run_btn.click(
516
- fn=run_and_submit_all,
517
- outputs=[status_box, result_table],
518
- show_progress=True
519
  )
520
 
521
  if __name__ == "__main__":
522
- demo.launch(debug=True, share=False)
 
5
  from smolagents import ToolCallingAgent, tool
6
  from duckduckgo_search import DDGS
7
  import math
 
8
  import re
 
9
  from datetime import datetime, timedelta
10
  import time
11
 
12
+ # --- Enhanced Tools with Proper Error Handling ---
13
  @tool
14
  def duck_search(query: str) -> str:
15
+ """Searches the web using DuckDuckGo.
 
16
 
17
  Args:
18
  query: The search query string.
19
 
20
  Returns:
21
+ A formatted string with search results.
22
  """
23
  try:
24
  with DDGS() as ddgs:
25
+ results = ddgs.text(query, max_results=3)
26
+ return "\n\n".join(
27
+ f"Title: {r['title']}\nContent: {r['body']}\nURL: {r['href']}"
28
+ for r in results
29
+ ) if results else "No results found."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  except Exception as e:
31
+ return f"Search error: {str(e)}"
32
 
33
  @tool
34
  def advanced_calculator(expression: str) -> str:
35
+ """Evaluates mathematical expressions.
 
36
 
37
  Args:
38
+ expression: The math expression to evaluate.
39
 
40
  Returns:
41
+ The result as a string.
42
  """
43
  try:
44
+ # Safe evaluation environment
45
+ safe_dict = {k: v for k, v in math.__dict__.items() if not k.startswith("__")}
46
+ safe_dict.update({
47
+ '__builtins__': None,
48
+ 'abs': abs,
49
+ 'round': round,
50
+ 'min': min,
51
+ 'max': max
52
+ })
53
+
54
+ # Handle percentage expressions
55
+ if '%' in expression:
56
+ expression = expression.replace('%', '/100')
57
+
58
+ result = eval(expression, {'__builtins__': None}, safe_dict)
 
 
 
 
 
 
 
 
 
 
59
  return str(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  except Exception as e:
61
+ return f"Calculation error: {str(e)}"
62
 
63
+ # --- Simplified Agent Class ---
64
+ class GAIAAgent:
65
  def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  try:
67
  self.agent = ToolCallingAgent(
68
+ name="GAIA_Agent",
69
+ description="Agent for GAIA benchmark tasks",
70
+ tools=[duck_search, advanced_calculator],
71
+ model="gpt-3.5-turbo", # Use this as default
72
+ max_iterations=5
73
  )
74
+ print("✅ Agent initialized successfully")
75
  except Exception as e:
76
+ print(f" Agent initialization failed: {str(e)}")
77
+ raise
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  def __call__(self, question: str) -> str:
80
+ """Process a question with proper error handling."""
 
 
 
 
 
 
 
 
 
 
81
  try:
82
+ # Simple preprocessing
83
+ question = question.strip()
 
 
 
 
 
 
 
84
 
85
+ # Run the agent
86
+ response = self.agent.run(question)
87
 
88
+ # Basic post-processing
89
+ if not response:
90
+ return "Could not generate an answer."
91
+
92
+ return str(response)[:1000] # Limit response length
 
 
93
 
94
  except Exception as e:
95
+ print(f"⚠️ Processing error: {str(e)}")
96
+ return f"Error processing question: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # --- Evaluation & Submission ---
99
  def run_and_submit_all(profile: gr.OAuthProfile | None):
100
+ if not profile:
 
 
 
 
101
  return "Please login to Hugging Face.", None
102
 
 
 
 
 
103
  try:
104
+ agent = GAIAAgent()
105
  except Exception as e:
106
+ return f"Agent initialization failed: {str(e)}", None
107
 
108
  try:
109
+ response = requests.get(
110
+ "https://agents-course-unit4-scoring.hf.space/questions",
111
+ timeout=30
112
+ )
113
  questions = response.json()
114
  if not questions:
115
  return "No questions received.", None
 
 
 
116
  except Exception as e:
117
+ return f"Failed to fetch questions: {str(e)}", None
118
 
119
+ results = []
120
+ answers = []
121
+
122
+ for item in questions[:20]: # Process first 20 questions for testing
123
  task_id = item.get("task_id")
124
  question = item.get("question")
125
 
126
  if not task_id or not question:
127
  continue
128
 
 
 
129
  try:
130
  answer = agent(question)
131
+ answers.append({
132
+ "task_id": task_id,
 
 
 
 
 
 
 
 
 
 
 
133
  "submitted_answer": answer
134
  })
135
+ results.append({
136
+ "Task ID": task_id,
137
+ "Question": question[:100],
138
+ "Answer": answer[:200]
139
+ })
140
  except Exception as e:
141
+ answers.append({
142
+ "task_id": task_id,
143
+ "submitted_answer": f"Error: {str(e)}"
 
 
 
 
144
  })
145
+ results.append({
146
+ "Task ID": task_id,
147
+ "Question": question[:100],
148
+ "Answer": f"Error: {str(e)}"
149
  })
150
 
 
 
 
 
 
151
  try:
152
+ response = requests.post(
153
+ "https://agents-course-unit4-scoring.hf.space/submit",
154
+ json={
155
+ "username": profile.username,
156
+ "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
157
+ "answers": answers
158
+ },
159
+ timeout=60
160
+ )
161
+ data = response.json()
162
+ return (
163
+ f"Submitted {len(answers)} answers\n"
164
+ f"Score: {data.get('score', 'N/A')}%\n"
165
+ f"Correct: {data.get('correct_count', 0)}/{data.get('total_attempted', 0)}\n"
166
+ f"Message: {data.get('message', '')}",
167
+ pd.DataFrame(results)
 
 
 
168
  )
 
 
 
 
 
169
  except Exception as e:
170
+ return f"Submission failed: {str(e)}", pd.DataFrame(results)
 
 
171
 
172
+ # --- Gradio Interface ---
173
+ with gr.Blocks() as demo:
174
+ gr.Markdown("# GAIA Agent")
 
 
 
 
 
 
 
 
 
 
 
175
  gr.LoginButton()
176
+ submit_btn = gr.Button("Run & Submit", variant="primary")
177
+ output = gr.Textbox(label="Results")
178
+ table = gr.DataFrame(label="Details")
179
 
180
+ submit_btn.click(
181
+ fn=run_and_submit_all,
182
+ outputs=[output, table]
 
 
 
 
 
 
 
183
  )
184
 
185
  if __name__ == "__main__":
186
+ demo.launch()