Commit
·
37c9a6b
1
Parent(s):
a6c261a
Refactoring
Browse files- requirements.txt +4 -1
- src/agent/base_agent.py +56 -16
- src/agent/google_vertex_ai_agent.py +19 -0
- src/agent/open_ai_agent.py +18 -0
- src/core/evaluator.py +27 -4
- src/models/__init__.py +0 -0
- src/rest_clients/hs_evaluator_client.py +30 -0
- src/tools/extract_text_from_image.py +43 -0
requirements.txt
CHANGED
@@ -4,4 +4,7 @@ requests~=2.32.3
|
|
4 |
smolagents~=1.13.0
|
5 |
python-dotenv~=1.1.0
|
6 |
pandas~=2.2.3
|
7 |
-
|
|
|
|
|
|
|
|
4 |
smolagents~=1.13.0
|
5 |
python-dotenv~=1.1.0
|
6 |
pandas~=2.2.3
|
7 |
+
openpyxl~=3.1.5
|
8 |
+
litellm~=1.66.1
|
9 |
+
easyocr~=1.7.2
|
10 |
+
wikipedia-api
|
src/agent/base_agent.py
CHANGED
@@ -1,24 +1,64 @@
|
|
1 |
-
import
|
2 |
-
|
|
|
|
|
|
|
3 |
from tools.weater_info_tool import WeatherInfoTool
|
4 |
|
5 |
|
6 |
-
class
|
7 |
-
def __init__(self):
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
self.
|
13 |
-
|
14 |
-
model=model,
|
15 |
-
add_base_tools=True,
|
16 |
-
planning_interval=3
|
17 |
-
)
|
18 |
-
print("Agent initialized.")
|
19 |
|
20 |
def __call__(self, question: str) -> str:
|
21 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
22 |
fixed_answer = self.agent.run(question)
|
23 |
print(f"Agent returning fixed answer: {fixed_answer}")
|
24 |
-
return fixed_answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import abstractmethod, ABC
|
2 |
+
|
3 |
+
from smolagents import CodeAgent, Tool, DuckDuckGoSearchTool, WikipediaSearchTool
|
4 |
+
|
5 |
+
from tools.extract_text_from_image import ExtractTextFromImage
|
6 |
from tools.weater_info_tool import WeatherInfoTool
|
7 |
|
8 |
|
9 |
+
class BaseAgent(ABC):
|
10 |
+
def __init__(self, model_name: str, tools: list[Tool] | None = None, planning_interval: int = 3, max_steps: int = 10, use_all_custom_tools: bool = True):
|
11 |
+
self.model_name: str = model_name
|
12 |
+
self.planning_interval = planning_interval
|
13 |
+
self.max_steps = max_steps
|
14 |
+
self.use_all_custom_tools = use_all_custom_tools
|
15 |
+
self.tools: list[Tool] = self.init_tools(tools)
|
16 |
+
self.agent = self.init_agent()
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def __call__(self, question: str) -> str:
|
19 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
20 |
fixed_answer = self.agent.run(question)
|
21 |
print(f"Agent returning fixed answer: {fixed_answer}")
|
22 |
+
return fixed_answer
|
23 |
+
|
24 |
+
@abstractmethod
|
25 |
+
def get_model(self):
|
26 |
+
pass
|
27 |
+
|
28 |
+
def init_tools(self, tools: list[Tool] | None = None):
|
29 |
+
if tools is None:
|
30 |
+
tools = []
|
31 |
+
if self.use_all_custom_tools:
|
32 |
+
tools = [
|
33 |
+
ExtractTextFromImage(),
|
34 |
+
WeatherInfoTool(),
|
35 |
+
DuckDuckGoSearchTool(),
|
36 |
+
WikipediaSearchTool()
|
37 |
+
]
|
38 |
+
return tools
|
39 |
+
|
40 |
+
def add_tool(self, tool: Tool):
|
41 |
+
self.tools.append(tool)
|
42 |
+
|
43 |
+
def init_agent(self):
|
44 |
+
agent = CodeAgent(
|
45 |
+
model=self.get_model(),
|
46 |
+
tools=[t for t in self.tools],
|
47 |
+
add_base_tools=True,
|
48 |
+
verbosity_level=1,
|
49 |
+
additional_authorized_imports=[
|
50 |
+
"pandas",
|
51 |
+
"numpy",
|
52 |
+
"datetime",
|
53 |
+
"json",
|
54 |
+
"re",
|
55 |
+
"math",
|
56 |
+
"os",
|
57 |
+
"requests",
|
58 |
+
"csv",
|
59 |
+
"urllib"],
|
60 |
+
planning_interval=self.planning_interval,
|
61 |
+
max_steps=self.max_steps
|
62 |
+
)
|
63 |
+
print("Agent initialized.")
|
64 |
+
return agent
|
src/agent/google_vertex_ai_agent.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from smolagents import Tool, LiteLLMModel
|
3 |
+
from agent.base_agent import BaseAgent
|
4 |
+
|
5 |
+
|
6 |
+
class OpenAiAgent(BaseAgent):
|
7 |
+
def __init__(self, model_name: str = "gemini/gemini-2.0-flash-lite", tools: list[Tool] | None = None,
|
8 |
+
use_all_custom_tools: bool = True):
|
9 |
+
super().__init__(model_name=model_name, tools=tools, use_all_custom_tools=use_all_custom_tools)
|
10 |
+
self.model_name: str = model_name
|
11 |
+
self.agent = self.init_agent()
|
12 |
+
|
13 |
+
def get_model(self):
|
14 |
+
model = LiteLLMModel(
|
15 |
+
model_id=self.model_name,
|
16 |
+
temperature=0.2,
|
17 |
+
api_key=os.getenv("GEMINI_API_KEY")
|
18 |
+
)
|
19 |
+
return model
|
src/agent/open_ai_agent.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from smolagents import OpenAIServerModel, Tool
|
3 |
+
from agent.base_agent import BaseAgent
|
4 |
+
|
5 |
+
|
6 |
+
class OpenAiAgent(BaseAgent):
|
7 |
+
def __init__(self, model_name: str = "gpt-4o-mini", tools: list[Tool] | None = None, use_all_custom_tools: bool = True):
|
8 |
+
super().__init__(model_name=model_name, tools=tools, use_all_custom_tools=use_all_custom_tools)
|
9 |
+
self.model_name: str = model_name
|
10 |
+
self.agent = self.init_agent()
|
11 |
+
|
12 |
+
def get_model(self):
|
13 |
+
model = OpenAIServerModel(
|
14 |
+
model_id=self.model_name,
|
15 |
+
temperature=0.2,
|
16 |
+
api_key=os.getenv("OPENAI_API_KEY")
|
17 |
+
)
|
18 |
+
return model
|
src/core/evaluator.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import os
|
2 |
import pandas as pd
|
3 |
|
|
|
4 |
from rest_clients.hs_evaluator_client import HsEvaluatorClient
|
5 |
-
from src.agent.base_agent import BasicAgent
|
6 |
|
7 |
|
8 |
class Evaluator:
|
@@ -10,8 +10,8 @@ class Evaluator:
|
|
10 |
def __init__(self, profile):
|
11 |
self.profile = profile
|
12 |
self.username = profile.username if profile else None
|
13 |
-
self.space_id = os.getenv("SPACE_ID")
|
14 |
-
self.agent =
|
15 |
self.hs_evaluator_client: HsEvaluatorClient | None = None
|
16 |
|
17 |
def run_and_submit(self):
|
@@ -31,16 +31,39 @@ class Evaluator:
|
|
31 |
def _run_agent(self, questions):
|
32 |
results_log = []
|
33 |
answers_payload = []
|
|
|
34 |
for item in questions:
|
35 |
task_id = item.get("task_id")
|
36 |
question_text = item.get("question")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
if not task_id or question_text is None:
|
|
|
38 |
continue
|
39 |
try:
|
40 |
-
submitted_answer = self.agent(
|
41 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
42 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
43 |
except Exception as e:
|
|
|
44 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
45 |
return results_log, answers_payload
|
46 |
|
|
|
1 |
import os
|
2 |
import pandas as pd
|
3 |
|
4 |
+
from agent.open_ai_agent import OpenAiAgent
|
5 |
from rest_clients.hs_evaluator_client import HsEvaluatorClient
|
|
|
6 |
|
7 |
|
8 |
class Evaluator:
|
|
|
10 |
def __init__(self, profile):
|
11 |
self.profile = profile
|
12 |
self.username = profile.username if profile else None
|
13 |
+
self.space_id = os.getenv("SPACE_ID", "tommaso1288/Final_Assignment_Template")
|
14 |
+
self.agent = OpenAiAgent()
|
15 |
self.hs_evaluator_client: HsEvaluatorClient | None = None
|
16 |
|
17 |
def run_and_submit(self):
|
|
|
31 |
def _run_agent(self, questions):
|
32 |
results_log = []
|
33 |
answers_payload = []
|
34 |
+
print(f"Running agent on {len(questions)} questions...")
|
35 |
for item in questions:
|
36 |
task_id = item.get("task_id")
|
37 |
question_text = item.get("question")
|
38 |
+
|
39 |
+
# ----------fetch any attached file ----------
|
40 |
+
try:
|
41 |
+
file_path = self.get_hs_evaluator_client().download_file_if_any(task_id)
|
42 |
+
except Exception as e:
|
43 |
+
file_path = None
|
44 |
+
print(f"[file fetch error] {task_id}: {e}")
|
45 |
+
|
46 |
+
# ---------- Build the prompt sent to the agent ----------
|
47 |
+
if file_path:
|
48 |
+
q_for_agent = (
|
49 |
+
f"{question_text}\n\n"
|
50 |
+
f"---\n"
|
51 |
+
f"A file was downloaded for this task and saved locally at:\n"
|
52 |
+
f"{file_path}\n"
|
53 |
+
f"---\n\n"
|
54 |
+
)
|
55 |
+
else:
|
56 |
+
q_for_agent = question_text
|
57 |
+
|
58 |
if not task_id or question_text is None:
|
59 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
60 |
continue
|
61 |
try:
|
62 |
+
submitted_answer = self.agent(q_for_agent)
|
63 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
64 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
65 |
except Exception as e:
|
66 |
+
print(f"Error running agent on task {task_id}: {e}")
|
67 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
68 |
return results_log, answers_payload
|
69 |
|
src/models/__init__.py
ADDED
File without changes
|
src/rest_clients/hs_evaluator_client.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import requests
|
3 |
|
@@ -18,6 +22,32 @@ class HsEvaluatorClient:
|
|
18 |
print(f"Error fetching questions: {e}")
|
19 |
return None
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def submit_answers(self, answers_payload, results_log):
|
22 |
agent_code = f"https://huggingface.co/spaces/{self.space_id}/tree/main"
|
23 |
submission_data = {
|
|
|
1 |
+
import re
|
2 |
+
import tempfile
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
import pandas as pd
|
6 |
import requests
|
7 |
|
|
|
22 |
print(f"Error fetching questions: {e}")
|
23 |
return None
|
24 |
|
25 |
+
def download_file_if_any(self, task_id: str) -> str | None:
|
26 |
+
url = f"{self.base_url}/files/{task_id}"
|
27 |
+
try:
|
28 |
+
resp = requests.get(url, timeout=30)
|
29 |
+
if resp.status_code == 404:
|
30 |
+
return None
|
31 |
+
resp.raise_for_status()
|
32 |
+
except requests.exceptions.HTTPError as e:
|
33 |
+
raise e
|
34 |
+
|
35 |
+
# ▸ Save bytes to a named file inside the system temp dir
|
36 |
+
# Try to keep original extension from Content-Disposition if present.
|
37 |
+
cdisp = resp.headers.get("content-disposition", "")
|
38 |
+
filename = task_id # default base name
|
39 |
+
if "filename=" in cdisp:
|
40 |
+
m = re.search(r'filename="([^"]+)"', cdisp)
|
41 |
+
if m:
|
42 |
+
filename = m.group(1) # keep provided name
|
43 |
+
|
44 |
+
tmp_dir = Path(tempfile.gettempdir()) / "gaia_files"
|
45 |
+
tmp_dir.mkdir(exist_ok=True)
|
46 |
+
file_path = tmp_dir / filename
|
47 |
+
with open(file_path, "wb") as f:
|
48 |
+
f.write(resp.content)
|
49 |
+
return str(file_path)
|
50 |
+
|
51 |
def submit_answers(self, answers_payload, results_log):
|
52 |
agent_code = f"https://huggingface.co/spaces/{self.space_id}/tree/main"
|
53 |
submission_data = {
|
src/tools/extract_text_from_image.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import easyocr
|
3 |
+
from smolagents import Tool
|
4 |
+
|
5 |
+
|
6 |
+
class ExtractTextFromImage(Tool):
|
7 |
+
name = "extract_text_from_image"
|
8 |
+
description = "A tool for extracting text from an image using the EasyOCR library."
|
9 |
+
inputs = {
|
10 |
+
"image_path": {
|
11 |
+
"type": "string",
|
12 |
+
"description": "The file path to the image to be processed."
|
13 |
+
}
|
14 |
+
}
|
15 |
+
output_type = "string"
|
16 |
+
|
17 |
+
def forward(self, image_path: str) -> str:
|
18 |
+
"""
|
19 |
+
Extract text from an image file using EasyOCR.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
image_path (str): The path to the image file to be processed.
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
str: The extracted text from the image or an error message.
|
26 |
+
"""
|
27 |
+
try:
|
28 |
+
if not os.path.exists(image_path):
|
29 |
+
return f"Error: File '{image_path}' does not exist."
|
30 |
+
|
31 |
+
reader = easyocr.Reader(['en'], gpu=False) # Use GPU=True for faster execution if available
|
32 |
+
|
33 |
+
results = reader.readtext(image_path, detail=1)
|
34 |
+
|
35 |
+
if not results:
|
36 |
+
return "No text detected in the image."
|
37 |
+
extracted_texts = [result[1] for result in results] # Extract the text field from results
|
38 |
+
extracted_text = "\n".join(extracted_texts)
|
39 |
+
return f"Extracted text from image:\n\n{extracted_text}"
|
40 |
+
except ImportError:
|
41 |
+
return "Error: easyocr is not installed. Please install it with 'pip install easyocr'."
|
42 |
+
except Exception as e:
|
43 |
+
return f"Error extracting text from image: {str(e)}"
|