sirine1712 commited on
Commit
94d642e
Β·
verified Β·
1 Parent(s): 8241e7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -201
app.py CHANGED
@@ -1,214 +1,154 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- import json
7
- import re
8
- from typing import Dict, List, Any, Optional
9
- import asyncio
10
- from datetime import datetime
11
- import tempfile
12
- import base64
13
- from io import BytesIO
14
- from PIL import Image
15
- import numpy as np
16
-
17
- # Additional imports for enhanced capabilities
18
- try:
19
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
20
- import torch
21
- except ImportError:
22
- print("Warning: transformers not available. Install with: pip install transformers torch")
23
-
24
- try:
25
- from sentence_transformers import SentenceTransformer
26
- except ImportError:
27
- print("Warning: sentence-transformers not available. Install with: pip install sentence-transformers")
28
-
29
- try:
30
- import wikipediaapi
31
- except ImportError:
32
- print("Warning: wikipedia-api not available. Install with: pip install wikipedia-api")
33
 
34
  # --- Constants ---
35
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
36
 
37
- class EnhancedGAIAAgent:
38
- """
39
- Enhanced agent for GAIA benchmark with multi-modal capabilities,
40
- web search, RAG, and multiple reasoning strategies.
41
- """
42
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def __init__(self):
44
- print("EnhancedGAIAAgent initializing...")
45
- self.setup_models()
46
- self.setup_tools()
47
- self.knowledge_base = {}
48
- print("EnhancedGAIAAgent initialized successfully.")
49
-
50
- def setup_models(self):
51
- """Initialize models for different tasks"""
 
 
 
52
  try:
53
- # Text generation model for reasoning
54
- self.text_model = None # Will lazy load when needed
55
-
56
- # Embedding model for RAG
57
- try:
58
- self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
59
- print("βœ… Embedding model loaded")
60
- except:
61
- self.embedder = None
62
- print("⚠️ Embedding model not available")
63
-
64
- # Vision model for image analysis
65
- try:
66
- self.vision_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
67
- print("βœ… Vision model loaded")
68
- except:
69
- self.vision_model = None
70
- print("⚠️ Vision model not available")
71
-
72
  except Exception as e:
73
- print(f"Model setup error: {e}")
74
-
75
- def setup_tools(self):
76
- """Initialize tools for web search and knowledge retrieval"""
77
- try:
78
- self.wiki = wikipediaapi.Wikipedia(
79
- language='en',
80
- extract_format=wikipediaapi.ExtractFormat.WIKI,
81
- user_agent='GAIA-Agent/1.0'
82
- )
83
- print("βœ… Wikipedia API initialized")
84
- except:
85
- self.wiki = None
86
- print("⚠️ Wikipedia API not available")
87
-
88
- def web_search(self, query: str, max_results: int = 3) -> List[Dict]:
89
- """
90
- Simulate web search using multiple sources
91
- """
92
- results = []
93
-
94
- # Wikipedia search
95
- if self.wiki:
96
- try:
97
- page = self.wiki.page(query)
98
- if page.exists():
99
- results.append({
100
- 'title': page.title,
101
- 'content': page.text[:1000],
102
- 'source': 'Wikipedia',
103
- 'url': page.fullurl
104
- })
105
- except:
106
- pass
107
-
108
- # Add more search sources here (DuckDuckGo, etc.)
109
- return results[:max_results]
110
-
111
- def extract_numbers_and_calculations(self, text: str) -> Dict:
112
- """Extract numbers and perform calculations from text"""
113
- numbers = re.findall(r'-?\d+\.?\d*', text)
114
- calculations = {
115
- 'numbers_found': [float(n) for n in numbers if n],
116
- 'sum': sum(float(n) for n in numbers if n),
117
- 'count': len(numbers)
118
- }
119
- return calculations
120
-
121
- def analyze_image(self, image_path: str) -> str:
122
- """Analyze image content"""
123
- if not self.vision_model:
124
- return "Image analysis not available"
125
-
126
  try:
127
- image = Image.open(image_path)
128
- result = self.vision_model(image)
129
- return result[0]['generated_text'] if result else "Could not analyze image"
 
 
 
 
 
 
 
130
  except Exception as e:
131
- return f"Image analysis error: {e}"
132
-
133
- def rag_retrieval(self, query: str, context: str) -> str:
134
- """Simple RAG-like retrieval and generation"""
135
- if not self.embedder:
136
- return context[:500] # Return truncated context
137
-
138
- try:
139
- # Split context into chunks
140
- chunks = [context[i:i+200] for i in range(0, len(context), 200)]
141
-
142
- # Find most relevant chunk
143
- query_embedding = self.embedder.encode([query])
144
- chunk_embeddings = self.embedder.encode(chunks)
145
-
146
- similarities = np.dot(query_embedding, chunk_embeddings.T)[0]
147
- best_chunk_idx = np.argmax(similarities)
148
-
149
- return chunks[best_chunk_idx]
150
- except:
151
- return context[:500]
152
-
153
- def mathematical_reasoning(self, question: str) -> str:
154
- """Handle mathematical questions"""
155
- # Extract mathematical expressions
156
- math_patterns = [
157
- r'(\d+(?:\.\d+)?)\s*[\+\-\*\/]\s*(\d+(?:\.\d+)?)',
158
- r'(\d+)\s*percent|(\d+)%',
159
- r'(\d+)\s*degrees?',
160
- ]
161
-
162
- for pattern in math_patterns:
163
- matches = re.findall(pattern, question)
164
- if matches:
165
- # Simple calculation handling
166
- try:
167
- nums = self.extract_numbers_and_calculations(question)
168
- if nums['numbers_found']:
169
- return f"Based on the numbers found: {nums['numbers_found']}, the sum is {nums['sum']}"
170
- except:
171
- pass
172
-
173
- return "Mathematical reasoning applied but no clear calculation found."
174
-
175
- def factual_qa(self, question: str) -> str:
176
- """Handle factual questions using web search"""
177
- search_results = self.web_search(question)
178
-
179
- if not search_results:
180
- return "I couldn't find relevant information to answer this question."
181
-
182
- # Combine search results
183
- combined_info = ""
184
- for result in search_results:
185
- combined_info += f"{result['content']}\n"
186
-
187
- # Use RAG to get most relevant information
188
- relevant_info = self.rag_retrieval(question, combined_info)
189
-
190
- return f"Based on available information: {relevant_info}"
191
-
192
- def multi_step_reasoning(self, question: str) -> str:
193
- """Handle complex multi-step questions"""
194
- steps = []
195
-
196
- # Step 1: Identify question type
197
- question_lower = question.lower()
198
-
199
- if any(word in question_lower for word in ['calculate', 'compute', 'math', 'number']):
200
- steps.append("Identified as mathematical question")
201
- result = self.mathematical_reasoning(question)
202
- elif any(word in question_lower for word in ['when', 'where', 'who', 'what', 'how']):
203
- steps.append("Identified as factual question")
204
- result = self.factual_qa(question)
205
- else:
206
- steps.append("Using general reasoning")
207
- result = self.general_reasoning(question)
208
-
209
- return result
210
-
211
- def general_reasoning(self, question: str) -> str:
212
- """General reasoning for questions that don't fit other categories"""
213
- # Try to extract key entities and concepts
214
- key
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ from smolagents import ToolCallingAgent, tool
6
+ import duckduckgo_search
7
+ import math
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
+ # --- Tools ---
13
+ @tool
14
+ def duck_search(query: str) -> str:
15
+ """Searches the web using DuckDuckGo and returns a short summary."""
16
+ try:
17
+ results = duckduckgo_search.ddg(query, max_results=3)
18
+ if results:
19
+ return "\n".join([f"{r['title']}: {r['body']}" for r in results])
20
+ else:
21
+ return "No results found."
22
+ except Exception as e:
23
+ return f"Search error: {e}"
24
+
25
+ @tool
26
+ def calculator(expression: str) -> str:
27
+ """Safely evaluates basic math expressions."""
28
+ try:
29
+ result = eval(expression, {"__builtins__": {}}, math.__dict__)
30
+ return str(result)
31
+ except Exception as e:
32
+ return f"Calculation error: {e}"
33
+
34
+ # --- Agent Definition ---
35
+ class WebSearchAgent:
36
  def __init__(self):
37
+ self.agent = ToolCallingAgent(
38
+ name="GAIAWebToolAgent",
39
+ description="An agent that answers questions using reasoning and tools like web search and calculator.",
40
+ tools=[duck_search, calculator],
41
+ step_limit=5,
42
+ system_prompt="You're a helpful agent tasked with answering general questions using reasoning and external tools if needed. Prioritize factual accuracy, logic, and concise answers."
43
+ )
44
+ print("βœ… WebSearchAgent initialized.")
45
+
46
+ def __call__(self, question: str) -> str:
47
+ print(f"πŸ” Agent received: {question}")
48
  try:
49
+ return self.agent.run(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
+ print(f"❌ Error: {e}")
52
+ return f"Error: {e}"
53
+
54
+ # --- Main Evaluation Logic ---
55
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
56
+ space_id = os.getenv("SPACE_ID")
57
+ if profile:
58
+ username = profile.username
59
+ print(f"User logged in: {username}")
60
+ else:
61
+ return "Please login to Hugging Face first.", None
62
+
63
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
64
+ api_url = DEFAULT_API_URL
65
+ questions_url = f"{api_url}/questions"
66
+ submit_url = f"{api_url}/submit"
67
+
68
+ try:
69
+ agent = WebSearchAgent()
70
+ except Exception as e:
71
+ return f"Agent init error: {e}", None
72
+
73
+ try:
74
+ print("πŸ“₯ Fetching questions...")
75
+ response = requests.get(questions_url, timeout=15)
76
+ response.raise_for_status()
77
+ questions_data = response.json()
78
+ if not questions_data:
79
+ return "Fetched questions list is empty or invalid format.", None
80
+ print(f"βœ… Fetched {len(questions_data)} questions.")
81
+ except Exception as e:
82
+ return f"Error fetching questions: {e}", None
83
+
84
+ answers_payload = []
85
+ results_log = []
86
+ print("πŸš€ Running agent on questions...")
87
+ for item in questions_data:
88
+ task_id = item.get("task_id")
89
+ question_text = item.get("question")
90
+ if not task_id or not question_text:
91
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
92
  try:
93
+ submitted_answer = agent(question_text)
94
+ answers_payload.append({
95
+ "task_id": task_id,
96
+ "submitted_answer": submitted_answer
97
+ })
98
+ results_log.append({
99
+ "Task ID": task_id,
100
+ "Question": question_text,
101
+ "Submitted Answer": submitted_answer
102
+ })
103
  except Exception as e:
104
+ error_msg = f"Agent error: {e}"
105
+ print(error_msg)
106
+ results_log.append({
107
+ "Task ID": task_id,
108
+ "Question": question_text,
109
+ "Submitted Answer": error_msg
110
+ })
111
+
112
+ if not answers_payload:
113
+ return "No answers to submit.", pd.DataFrame(results_log)
114
+
115
+ print("πŸ“€ Submitting answers...")
116
+ submission_data = {
117
+ "username": username.strip(),
118
+ "agent_code": agent_code,
119
+ "answers": answers_payload
120
+ }
121
+
122
+ try:
123
+ response = requests.post(submit_url, json=submission_data, timeout=60)
124
+ response.raise_for_status()
125
+ result = response.json()
126
+ final_status = (
127
+ f"βœ… Submission Successful!\n"
128
+ f"User: {result.get('username')}\n"
129
+ f"Score: {result.get('score', 'N/A')}% "
130
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
131
+ f"Message: {result.get('message', 'No message.')}"
132
+ )
133
+ return final_status, pd.DataFrame(results_log)
134
+ except Exception as e:
135
+ return f"Submission error: {e}", pd.DataFrame(results_log)
136
+
137
+ # --- Gradio UI ---
138
+ with gr.Blocks() as demo:
139
+ gr.Markdown("# 🧠 GAIA Agent with Web Search & Calculator")
140
+ gr.Markdown("""
141
+ 1. Log in to Hugging Face.
142
+ 2. Click **Run Evaluation** to fetch, run, and submit.
143
+ 3. Your agent uses web search (DuckDuckGo) and math tools.
144
+ """)
145
+ gr.LoginButton()
146
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers")
147
+ status_output = gr.Textbox(label="Status", lines=5)
148
+ results_table = gr.DataFrame(label="Answer Log")
149
+
150
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
151
+
152
+ if __name__ == "__main__":
153
+ print("🌍 Launching App...")
154
+ demo.launch(debug=True, share=False)