tommaso1288 commited on
Commit
37c9a6b
·
1 Parent(s): a6c261a

Refactoring

Browse files
requirements.txt CHANGED
@@ -4,4 +4,7 @@ requests~=2.32.3
4
  smolagents~=1.13.0
5
  python-dotenv~=1.1.0
6
  pandas~=2.2.3
7
- litellm~=1.66.1
 
 
 
 
4
  smolagents~=1.13.0
5
  python-dotenv~=1.1.0
6
  pandas~=2.2.3
7
+ openpyxl~=3.1.5
8
+ litellm~=1.66.1
9
+ easyocr~=1.7.2
10
+ wikipedia-api
src/agent/base_agent.py CHANGED
@@ -1,24 +1,64 @@
1
- import os
2
- from smolagents import CodeAgent, LiteLLMModel, DuckDuckGoSearchTool
 
 
 
3
  from tools.weater_info_tool import WeatherInfoTool
4
 
5
 
6
- class BasicAgent:
7
- def __init__(self):
8
- model = LiteLLMModel(
9
- model_id="gemini/gemini-2.0-flash-lite",
10
- api_key=os.getenv("GEMINI_API_KEY")
11
- )
12
- self.agent = CodeAgent(
13
- tools=[WeatherInfoTool(), DuckDuckGoSearchTool()],
14
- model=model,
15
- add_base_tools=True,
16
- planning_interval=3
17
- )
18
- print("Agent initialized.")
19
 
20
  def __call__(self, question: str) -> str:
21
  print(f"Agent received question (first 50 chars): {question[:50]}...")
22
  fixed_answer = self.agent.run(question)
23
  print(f"Agent returning fixed answer: {fixed_answer}")
24
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import abstractmethod, ABC
2
+
3
+ from smolagents import CodeAgent, Tool, DuckDuckGoSearchTool, WikipediaSearchTool
4
+
5
+ from tools.extract_text_from_image import ExtractTextFromImage
6
  from tools.weater_info_tool import WeatherInfoTool
7
 
8
 
9
+ class BaseAgent(ABC):
10
+ def __init__(self, model_name: str, tools: list[Tool] | None = None, planning_interval: int = 3, max_steps: int = 10, use_all_custom_tools: bool = True):
11
+ self.model_name: str = model_name
12
+ self.planning_interval = planning_interval
13
+ self.max_steps = max_steps
14
+ self.use_all_custom_tools = use_all_custom_tools
15
+ self.tools: list[Tool] = self.init_tools(tools)
16
+ self.agent = self.init_agent()
 
 
 
 
 
17
 
18
  def __call__(self, question: str) -> str:
19
  print(f"Agent received question (first 50 chars): {question[:50]}...")
20
  fixed_answer = self.agent.run(question)
21
  print(f"Agent returning fixed answer: {fixed_answer}")
22
+ return fixed_answer
23
+
24
+ @abstractmethod
25
+ def get_model(self):
26
+ pass
27
+
28
+ def init_tools(self, tools: list[Tool] | None = None):
29
+ if tools is None:
30
+ tools = []
31
+ if self.use_all_custom_tools:
32
+ tools = [
33
+ ExtractTextFromImage(),
34
+ WeatherInfoTool(),
35
+ DuckDuckGoSearchTool(),
36
+ WikipediaSearchTool()
37
+ ]
38
+ return tools
39
+
40
+ def add_tool(self, tool: Tool):
41
+ self.tools.append(tool)
42
+
43
+ def init_agent(self):
44
+ agent = CodeAgent(
45
+ model=self.get_model(),
46
+ tools=[t for t in self.tools],
47
+ add_base_tools=True,
48
+ verbosity_level=1,
49
+ additional_authorized_imports=[
50
+ "pandas",
51
+ "numpy",
52
+ "datetime",
53
+ "json",
54
+ "re",
55
+ "math",
56
+ "os",
57
+ "requests",
58
+ "csv",
59
+ "urllib"],
60
+ planning_interval=self.planning_interval,
61
+ max_steps=self.max_steps
62
+ )
63
+ print("Agent initialized.")
64
+ return agent
src/agent/google_vertex_ai_agent.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from smolagents import Tool, LiteLLMModel
3
+ from agent.base_agent import BaseAgent
4
+
5
+
6
+ class OpenAiAgent(BaseAgent):
7
+ def __init__(self, model_name: str = "gemini/gemini-2.0-flash-lite", tools: list[Tool] | None = None,
8
+ use_all_custom_tools: bool = True):
9
+ super().__init__(model_name=model_name, tools=tools, use_all_custom_tools=use_all_custom_tools)
10
+ self.model_name: str = model_name
11
+ self.agent = self.init_agent()
12
+
13
+ def get_model(self):
14
+ model = LiteLLMModel(
15
+ model_id=self.model_name,
16
+ temperature=0.2,
17
+ api_key=os.getenv("GEMINI_API_KEY")
18
+ )
19
+ return model
src/agent/open_ai_agent.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from smolagents import OpenAIServerModel, Tool
3
+ from agent.base_agent import BaseAgent
4
+
5
+
6
+ class OpenAiAgent(BaseAgent):
7
+ def __init__(self, model_name: str = "gpt-4o-mini", tools: list[Tool] | None = None, use_all_custom_tools: bool = True):
8
+ super().__init__(model_name=model_name, tools=tools, use_all_custom_tools=use_all_custom_tools)
9
+ self.model_name: str = model_name
10
+ self.agent = self.init_agent()
11
+
12
+ def get_model(self):
13
+ model = OpenAIServerModel(
14
+ model_id=self.model_name,
15
+ temperature=0.2,
16
+ api_key=os.getenv("OPENAI_API_KEY")
17
+ )
18
+ return model
src/core/evaluator.py CHANGED
@@ -1,8 +1,8 @@
1
  import os
2
  import pandas as pd
3
 
 
4
  from rest_clients.hs_evaluator_client import HsEvaluatorClient
5
- from src.agent.base_agent import BasicAgent
6
 
7
 
8
  class Evaluator:
@@ -10,8 +10,8 @@ class Evaluator:
10
  def __init__(self, profile):
11
  self.profile = profile
12
  self.username = profile.username if profile else None
13
- self.space_id = os.getenv("SPACE_ID")
14
- self.agent = BasicAgent()
15
  self.hs_evaluator_client: HsEvaluatorClient | None = None
16
 
17
  def run_and_submit(self):
@@ -31,16 +31,39 @@ class Evaluator:
31
  def _run_agent(self, questions):
32
  results_log = []
33
  answers_payload = []
 
34
  for item in questions:
35
  task_id = item.get("task_id")
36
  question_text = item.get("question")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  if not task_id or question_text is None:
 
38
  continue
39
  try:
40
- submitted_answer = self.agent(question_text)
41
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
42
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
43
  except Exception as e:
 
44
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
45
  return results_log, answers_payload
46
 
 
1
  import os
2
  import pandas as pd
3
 
4
+ from agent.open_ai_agent import OpenAiAgent
5
  from rest_clients.hs_evaluator_client import HsEvaluatorClient
 
6
 
7
 
8
  class Evaluator:
 
10
  def __init__(self, profile):
11
  self.profile = profile
12
  self.username = profile.username if profile else None
13
+ self.space_id = os.getenv("SPACE_ID", "tommaso1288/Final_Assignment_Template")
14
+ self.agent = OpenAiAgent()
15
  self.hs_evaluator_client: HsEvaluatorClient | None = None
16
 
17
  def run_and_submit(self):
 
31
  def _run_agent(self, questions):
32
  results_log = []
33
  answers_payload = []
34
+ print(f"Running agent on {len(questions)} questions...")
35
  for item in questions:
36
  task_id = item.get("task_id")
37
  question_text = item.get("question")
38
+
39
+ # ----------fetch any attached file ----------
40
+ try:
41
+ file_path = self.get_hs_evaluator_client().download_file_if_any(task_id)
42
+ except Exception as e:
43
+ file_path = None
44
+ print(f"[file fetch error] {task_id}: {e}")
45
+
46
+ # ---------- Build the prompt sent to the agent ----------
47
+ if file_path:
48
+ q_for_agent = (
49
+ f"{question_text}\n\n"
50
+ f"---\n"
51
+ f"A file was downloaded for this task and saved locally at:\n"
52
+ f"{file_path}\n"
53
+ f"---\n\n"
54
+ )
55
+ else:
56
+ q_for_agent = question_text
57
+
58
  if not task_id or question_text is None:
59
+ print(f"Skipping item with missing task_id or question: {item}")
60
  continue
61
  try:
62
+ submitted_answer = self.agent(q_for_agent)
63
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
64
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
65
  except Exception as e:
66
+ print(f"Error running agent on task {task_id}: {e}")
67
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
68
  return results_log, answers_payload
69
 
src/models/__init__.py ADDED
File without changes
src/rest_clients/hs_evaluator_client.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import pandas as pd
2
  import requests
3
 
@@ -18,6 +22,32 @@ class HsEvaluatorClient:
18
  print(f"Error fetching questions: {e}")
19
  return None
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def submit_answers(self, answers_payload, results_log):
22
  agent_code = f"https://huggingface.co/spaces/{self.space_id}/tree/main"
23
  submission_data = {
 
1
+ import re
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
  import pandas as pd
6
  import requests
7
 
 
22
  print(f"Error fetching questions: {e}")
23
  return None
24
 
25
+ def download_file_if_any(self, task_id: str) -> str | None:
26
+ url = f"{self.base_url}/files/{task_id}"
27
+ try:
28
+ resp = requests.get(url, timeout=30)
29
+ if resp.status_code == 404:
30
+ return None
31
+ resp.raise_for_status()
32
+ except requests.exceptions.HTTPError as e:
33
+ raise e
34
+
35
+ # ▸ Save bytes to a named file inside the system temp dir
36
+ # Try to keep original extension from Content-Disposition if present.
37
+ cdisp = resp.headers.get("content-disposition", "")
38
+ filename = task_id # default base name
39
+ if "filename=" in cdisp:
40
+ m = re.search(r'filename="([^"]+)"', cdisp)
41
+ if m:
42
+ filename = m.group(1) # keep provided name
43
+
44
+ tmp_dir = Path(tempfile.gettempdir()) / "gaia_files"
45
+ tmp_dir.mkdir(exist_ok=True)
46
+ file_path = tmp_dir / filename
47
+ with open(file_path, "wb") as f:
48
+ f.write(resp.content)
49
+ return str(file_path)
50
+
51
  def submit_answers(self, answers_payload, results_log):
52
  agent_code = f"https://huggingface.co/spaces/{self.space_id}/tree/main"
53
  submission_data = {
src/tools/extract_text_from_image.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import easyocr
3
+ from smolagents import Tool
4
+
5
+
6
+ class ExtractTextFromImage(Tool):
7
+ name = "extract_text_from_image"
8
+ description = "A tool for extracting text from an image using the EasyOCR library."
9
+ inputs = {
10
+ "image_path": {
11
+ "type": "string",
12
+ "description": "The file path to the image to be processed."
13
+ }
14
+ }
15
+ output_type = "string"
16
+
17
+ def forward(self, image_path: str) -> str:
18
+ """
19
+ Extract text from an image file using EasyOCR.
20
+
21
+ Args:
22
+ image_path (str): The path to the image file to be processed.
23
+
24
+ Returns:
25
+ str: The extracted text from the image or an error message.
26
+ """
27
+ try:
28
+ if not os.path.exists(image_path):
29
+ return f"Error: File '{image_path}' does not exist."
30
+
31
+ reader = easyocr.Reader(['en'], gpu=False) # Use GPU=True for faster execution if available
32
+
33
+ results = reader.readtext(image_path, detail=1)
34
+
35
+ if not results:
36
+ return "No text detected in the image."
37
+ extracted_texts = [result[1] for result in results] # Extract the text field from results
38
+ extracted_text = "\n".join(extracted_texts)
39
+ return f"Extracted text from image:\n\n{extracted_text}"
40
+ except ImportError:
41
+ return "Error: easyocr is not installed. Please install it with 'pip install easyocr'."
42
+ except Exception as e:
43
+ return f"Error extracting text from image: {str(e)}"