altozachmo commited on
Commit
ca6fbc3
·
1 Parent(s): 968a67a

Attempt with wikipedia parsing tools

Browse files
agents/agent.py CHANGED
@@ -1,15 +1,11 @@
1
  from smolagents import (
2
  CodeAgent,
3
- DuckDuckGoSearchTool,
4
- WikipediaSearchTool,
5
  LiteLLMModel,
6
  Tool,
7
  )
8
- from tools.text_search import TextSearch
9
- from tools.text_splitter import text_splitter
10
- from tools.video_analyzer import WebVideoAnalyzerTool
11
  from typing import Callable
12
 
 
13
  class MyAgent:
14
  def __init__(
15
  self,
 
1
  from smolagents import (
2
  CodeAgent,
 
 
3
  LiteLLMModel,
4
  Tool,
5
  )
 
 
 
6
  from typing import Callable
7
 
8
+
9
  class MyAgent:
10
  def __init__(
11
  self,
agents/video_agent.py CHANGED
@@ -34,20 +34,30 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
34
  driver = helium.get_driver()
35
  current_step = memory_step.step_number
36
  if driver is not None:
37
- for previous_memory_step in agent.memory.steps: # Remove previous screenshots for lean processing
38
- if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
 
 
 
 
 
39
  previous_memory_step.observations_images = None
40
  png_bytes = driver.get_screenshot_as_png()
41
  image = Image.open(BytesIO(png_bytes))
42
  print(f"Captured a browser screenshot: {image.size} pixels")
43
- memory_step.observations_images = [image.copy()] # Create a copy to ensure it persists
 
 
44
 
45
  # Update observations with current URL
46
  url_info = f"Current url: {driver.current_url}"
47
  memory_step.observations = (
48
- url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
 
 
49
  )
50
 
 
51
  video_agent = MyAgent(
52
  api_key=os.getenv("GEMINI_API_KEY"),
53
  temperature=0.0,
 
34
  driver = helium.get_driver()
35
  current_step = memory_step.step_number
36
  if driver is not None:
37
+ for (
38
+ previous_memory_step
39
+ ) in agent.memory.steps: # Remove previous screenshots for lean processing
40
+ if (
41
+ isinstance(previous_memory_step, ActionStep)
42
+ and previous_memory_step.step_number <= current_step - 2
43
+ ):
44
  previous_memory_step.observations_images = None
45
  png_bytes = driver.get_screenshot_as_png()
46
  image = Image.open(BytesIO(png_bytes))
47
  print(f"Captured a browser screenshot: {image.size} pixels")
48
+ memory_step.observations_images = [
49
+ image.copy()
50
+ ] # Create a copy to ensure it persists
51
 
52
  # Update observations with current URL
53
  url_info = f"Current url: {driver.current_url}"
54
  memory_step.observations = (
55
+ url_info
56
+ if memory_step.observations is None
57
+ else memory_step.observations + "\n" + url_info
58
  )
59
 
60
+
61
  video_agent = MyAgent(
62
  api_key=os.getenv("GEMINI_API_KEY"),
63
  temperature=0.0,
app.py CHANGED
@@ -5,10 +5,51 @@ import pandas as pd
5
  from agents.agent import MyAgent
6
  import time
7
  from tqdm import tqdm
 
 
 
 
 
 
 
 
 
 
8
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def run_and_submit_all(profile: gr.OAuthProfile | None):
13
  """
14
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -30,14 +71,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
30
 
31
  # 1. Instantiate Agent ( modify this part to create your agent)
32
  try:
33
- agent = MyAgent(
34
- provider="litellm",
35
- model_id="gemini/gemini-2.0-flash-lite",
36
- api_key=os.getenv("GEMINI_API_KEY"),
37
- planning_interval=3,
38
- num_ctx=8192,
39
- temperature=0.2,
40
- )
41
 
42
  except Exception as e:
43
  print(f"Error instantiating agent: {e}")
@@ -72,17 +106,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
72
  answers_payload = []
73
  print(f"Running agent on {len(questions_data)} questions...")
74
  for item in tqdm(
75
- questions_data[0:3],
76
  desc="Agent is answering questions...",
77
  total=len(questions_data),
78
  ):
79
  task_id = item.get("task_id")
80
  question_text = item.get("question")
 
 
81
  if not task_id or question_text is None:
82
  print(f"Skipping item with missing task_id or question: {item}")
83
  continue
84
  try:
85
- submitted_answer = agent(question_text)
86
  time.sleep(30) # to avoid rate limiting
87
  answers_payload.append(
88
  {"task_id": task_id, "submitted_answer": submitted_answer}
 
5
  from agents.agent import MyAgent
6
  import time
7
  from tqdm import tqdm
8
+ from prompts.default_prompt import generate_prompt
9
+ from smolagents import (
10
+ DuckDuckGoSearchTool,
11
+ VisitWebpageTool,
12
+ )
13
+ from tools.text_search import TextSearch
14
+ from tools.text_splitter import text_splitter
15
+ from tools.webpage_parser import WebpageParser
16
+ from tools.parse_wikipedia_table import WikipediaParser
17
+ from tools.open_files import OpenFilesTool
18
 
19
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
20
 
21
 
22
+ myagent_args = {
23
+ "provider": "litellm",
24
+ "model_id": "gemini/gemini-2.0-flash-lite",
25
+ # "api_base": OLLAMA_API_BASE,
26
+ "planning_interval": 3,
27
+ "tools": [
28
+ DuckDuckGoSearchTool(),
29
+ WikipediaParser(),
30
+ VisitWebpageTool(),
31
+ TextSearch(),
32
+ text_splitter,
33
+ WebpageParser(),
34
+ OpenFilesTool(),
35
+ ],
36
+ "additional_authorized_imports": [
37
+ "pandas",
38
+ "numpy",
39
+ "datetime",
40
+ "json",
41
+ "re",
42
+ "math",
43
+ "os",
44
+ "requests",
45
+ "csv",
46
+ "urllib",
47
+ ],
48
+ "num_ctx": 8192,
49
+ "temperature": 0.2,
50
+ }
51
+
52
+
53
  def run_and_submit_all(profile: gr.OAuthProfile | None):
54
  """
55
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
71
 
72
  # 1. Instantiate Agent ( modify this part to create your agent)
73
  try:
74
+ agent = MyAgent(**myagent_args)
 
 
 
 
 
 
 
75
 
76
  except Exception as e:
77
  print(f"Error instantiating agent: {e}")
 
106
  answers_payload = []
107
  print(f"Running agent on {len(questions_data)} questions...")
108
  for item in tqdm(
109
+ questions_data,
110
  desc="Agent is answering questions...",
111
  total=len(questions_data),
112
  ):
113
  task_id = item.get("task_id")
114
  question_text = item.get("question")
115
+ file_name = item.get("file_name")
116
+ prompt = generate_prompt(question_text, file_name)
117
  if not task_id or question_text is None:
118
  print(f"Skipping item with missing task_id or question: {item}")
119
  continue
120
  try:
121
+ submitted_answer = agent(prompt)
122
  time.sleep(30) # to avoid rate limiting
123
  answers_payload.append(
124
  {"task_id": task_id, "submitted_answer": submitted_answer}
prompts/default_prompt.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def generate_prompt(question_text, file_name):
2
+ """
3
+ Generates a prompt for the agent based on the provided question text and file name.
4
+
5
+ Args:
6
+ question_text (str): The question to be answered.
7
+ file_name (str): The name of the file to be used in the task.
8
+
9
+ Returns:
10
+ str: The generated prompt.
11
+ """
12
+ # Define the full prompt with instructions and guidelines
13
+
14
+ full_prompt = f"""You are a highly precise answering agent.
15
+ When given a question:
16
+ - If necessary, perform a web search using the tool `DuckDuckGoSearchTool` to find possible sources of information.
17
+ - Use the `visit_webpage` tool to visit the webpage and extract the content in markdown format.
18
+ - If the web search only returns titles and short snippets, you MUST visit the actual webpage to read the full content before answering.
19
+ - Use the `WikipediaParser` tool to fetch and read the Wikipedia page when necessary.
20
+ - You just have the ability to read Wikipedia pages only.
21
+ - If the task requires reading, listening, or analyzing a file, you must use the file specified in the `file_name` field of the task metadata, not the file name mentioned casually inside the question text.
22
+ - Comma separated lists MUST contain a single space after each comma.
23
+ - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
24
+ - If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
25
+ - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
26
+ - Only answer after you have gathered enough information by reading the actual page contents.
27
+ - Once you have the final answer, you must call `final_answer("your_answer")` immediately after printing it.
28
+ - Do not retry or execute anything else after calling `final_answer`.
29
+ - `final_answer` must wrap the exact printed value.
30
+ Provide ONLY the precise answer requested.
31
+ Do not include explanations, steps, reasoning, or additional text.
32
+ Be direct and specific. GAIA benchmark requires exact matching answers.
33
+ Example: if asked "What is the capital of France?", respond exactly:
34
+ Thoughts: I need to retrieve the capital of France from Wikipedia and output it directly.
35
+ Code:
36
+ ```py
37
+ print("Paris")
38
+ ```<end_code>
39
+ Based on the above guidelines, answer the following question:
40
+ --begin of question--
41
+ {question_text}
42
+ --end of question--
43
+ If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
44
+ file_name: {file_name}"""
45
+ return full_prompt
pyproject.toml CHANGED
@@ -5,13 +5,17 @@ description = "Add your description here"
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
 
8
  "ffmpeg>=1.4",
9
  "gradio[oauth]>=5.27.0",
10
  "helium>=5.1.1",
11
  "litellm==1.67.1",
 
12
  "numpy>=2.2.5",
13
  "openai>=1.76.0",
 
14
  "opencv-python>=4.11.0.86",
 
15
  "pandas>=2.2.3",
16
  "pillow>=11.2.1",
17
  "python-dotenv>=1.1.0",
 
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
+ "beautifulsoup4>=4.13.4",
9
  "ffmpeg>=1.4",
10
  "gradio[oauth]>=5.27.0",
11
  "helium>=5.1.1",
12
  "litellm==1.67.1",
13
+ "markdownify>=1.1.0",
14
  "numpy>=2.2.5",
15
  "openai>=1.76.0",
16
+ "openai-whisper>=20240930",
17
  "opencv-python>=4.11.0.86",
18
+ "openpyxl>=3.1.5",
19
  "pandas>=2.2.3",
20
  "pillow>=11.2.1",
21
  "python-dotenv>=1.1.0",
run_local_agent.py CHANGED
@@ -1,5 +1,17 @@
1
  from agents.agent import MyAgent
2
  from utils import run_agent
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  import os
5
  import json
@@ -13,12 +25,35 @@ OLLAMA_API_BASE: str = os.getenv("OLLAMA_API_BASE", default="http://localhost:11
13
  OLLAMA_API_KEY: str | None = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
14
  OLLAMA_NUM_CTX: int = int(os.getenv("OLLAMA_NUM_CTX", default=8192))
15
 
 
16
  myagent_args = {
17
  "provider": "litellm",
18
  "model_id": "gemini/gemini-2.0-flash-lite",
19
  # "api_base": OLLAMA_API_BASE,
20
  "planning_interval": 3,
21
- "num_ctx": OLLAMA_NUM_CTX,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  }
23
 
24
  print(f"Using args: {myagent_args}")
@@ -29,6 +64,11 @@ if __name__ == "__main__":
29
  with open(QUESTIONS_FILEPATH, "r") as f:
30
  questions = json.load(f)
31
 
32
- answers = run_agent(agent, [questions[1]])
 
 
 
 
 
33
  print("Answers:", answers)
34
  print("Finished running the agent.")
 
1
  from agents.agent import MyAgent
2
  from utils import run_agent
3
+ from smolagents import (
4
+ DuckDuckGoSearchTool,
5
+ # WikipediaSearchTool,
6
+ VisitWebpageTool,
7
+ )
8
+ from tools.text_search import TextSearch
9
+ from tools.text_splitter import text_splitter
10
+ from tools.webpage_parser import WebpageParser
11
+ from tools.parse_wikipedia_table import WikipediaParser
12
+ from tools.open_files import OpenFilesTool
13
+ from prompts.default_prompt import generate_prompt
14
+
15
 
16
  import os
17
  import json
 
25
  OLLAMA_API_KEY: str | None = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
26
  OLLAMA_NUM_CTX: int = int(os.getenv("OLLAMA_NUM_CTX", default=8192))
27
 
28
+
29
  myagent_args = {
30
  "provider": "litellm",
31
  "model_id": "gemini/gemini-2.0-flash-lite",
32
  # "api_base": OLLAMA_API_BASE,
33
  "planning_interval": 3,
34
+ "tools": [
35
+ DuckDuckGoSearchTool(),
36
+ WikipediaParser(),
37
+ VisitWebpageTool(),
38
+ TextSearch(),
39
+ text_splitter,
40
+ WebpageParser(),
41
+ OpenFilesTool(),
42
+ ],
43
+ "additional_authorized_imports": [
44
+ "pandas",
45
+ "numpy",
46
+ "datetime",
47
+ "json",
48
+ "re",
49
+ "math",
50
+ "os",
51
+ "requests",
52
+ "csv",
53
+ "urllib",
54
+ ],
55
+ "num_ctx": 8192,
56
+ "temperature": 0.2,
57
  }
58
 
59
  print(f"Using args: {myagent_args}")
 
64
  with open(QUESTIONS_FILEPATH, "r") as f:
65
  questions = json.load(f)
66
 
67
+ question = questions[0]
68
+ question_text = question.get("question")
69
+ file_name = question.get("file_name")
70
+ prompt = generate_prompt(question_text, file_name)
71
+
72
+ answers = run_agent(agent, [questions[0]])
73
  print("Answers:", answers)
74
  print("Finished running the agent.")
tools/__init__.py CHANGED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ from markdownify import markdownify
4
+ from requests.exceptions import RequestException
5
+ from smolagents import tool
6
+
7
+
8
+ @tool
9
+ def visit_webpage(url: str) -> str:
10
+ """Visits a webpage at the given URL and returns its content as a markdown string.
11
+
12
+ Args:
13
+ url: The URL of the webpage to visit.
14
+
15
+ Returns:
16
+ The content of the webpage converted to Markdown, or an error message if the request fails.
17
+ """
18
+ try:
19
+ # Send a GET request to the URL
20
+ response = requests.get(url)
21
+ response.raise_for_status() # Raise an exception for bad status codes
22
+
23
+ # Convert the HTML content to Markdown
24
+ markdown_content = markdownify(response.text).strip()
25
+
26
+ # Remove multiple line breaks
27
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
28
+
29
+ return markdown_content
30
+
31
+ except RequestException as e:
32
+ return f"Error fetching the webpage: {str(e)}"
33
+ except Exception as e:
34
+ return f"An unexpected error occurred: {str(e)}"
tools/open_files.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ import os
3
+ import json
4
+ import csv
5
+ import openpyxl
6
+ import whisper
7
+ import requests
8
+
9
+
10
+ class OpenFilesTool(Tool):
11
+ name = "open_files_tool"
12
+ description = (
13
+ "This tool opens files and returns their content as a string. "
14
+ "It can handle text, CSV, JSON, XLSX, and MP3 file types."
15
+ )
16
+ inputs = {
17
+ "file_path": {
18
+ "type": "string",
19
+ "description": "The path to the file to be opened.",
20
+ },
21
+ "file_type": {
22
+ "type": "string",
23
+ "description": "The type of the file (text, csv, json, xlsx, mp3). Default is 'text'.",
24
+ "nullable": True,
25
+ },
26
+ }
27
+ output_type = "string"
28
+
29
+
30
+ def download_file(self, file_name: str) -> None:
31
+ if not os.path.exists(file_name):
32
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{file_name.split('.')[0]}"
33
+ r = requests.get(url)
34
+ with open(file_name, "wb") as f:
35
+ f.write(r.content)
36
+
37
+
38
+ def open_file_as_text(self, file_name: str, filetype: str = "txt") -> str:
39
+ """
40
+ Opens a file and returns its content as readable text.
41
+ Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (transcribes speech to text).
42
+ Args:
43
+ file_name (str): The path or name of the file.
44
+ filetype (Optional[str]): Type of file ('txt', 'json', 'csv', 'xlsx', 'mp3'). Defaults to 'txt'.
45
+ Returns:
46
+ str: The content of the file as text, or transcribed speech if 'mp3'.
47
+ """
48
+ self.download_file(file_name)
49
+ try:
50
+ if filetype == "txt":
51
+ with open(file_name, "r", encoding="utf-8") as f:
52
+ return f.read()
53
+
54
+ elif filetype == "json":
55
+ with open(file_name, "r", encoding="utf-8") as f:
56
+ data = json.load(f)
57
+ return json.dumps(data, indent=2)
58
+
59
+ elif filetype == "csv":
60
+ with open(file_name, "r", encoding="utf-8") as f:
61
+ reader = csv.reader(f)
62
+ rows = list(reader)
63
+ return "\n".join([", ".join(row) for row in rows])
64
+
65
+ elif filetype == "xlsx":
66
+ wb = openpyxl.load_workbook(file_name, data_only=True)
67
+ sheet = wb.active
68
+ content = []
69
+ for row in sheet.iter_rows(values_only=True):
70
+ content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
71
+ return "\n".join(content)
72
+
73
+ elif filetype == "mp3":
74
+ w = whisper.load_model("base")
75
+ res = w.transcribe(file_name)
76
+ return res["text"]
77
+
78
+ else:
79
+ return f"Unsupported filetype '{filetype}'. Supported types are 'txt', 'json', 'csv', 'xlsx', and 'mp3'."
80
+
81
+ except FileNotFoundError:
82
+ return f"File '{file_name}' not found."
83
+ except Exception as e:
84
+ return f"Error opening file '{file_name}': {str(e)}"
85
+
86
+ def forward(self, file_path: str, file_type: str = "text") -> str:
87
+ """
88
+ Opens a file and returns its content as a string.
89
+ Args:
90
+ file_path (str): The path to the file to be opened.
91
+ file_type (str): The type of the file (text, csv, json, xlsx, mp3). Default is 'text'.
92
+ Returns:
93
+ str: The content of the file as a string.
94
+ """
95
+ return self.open_file_as_text(file_path, file_type)
tools/parse_wikipedia_table.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ import requests
3
+ from bs4 import BeautifulSoup, Tag
4
+
5
+
6
+ class WikipediaParser(Tool):
7
+ name: str = "wikipedia_parser_tool"
8
+ description: str = (
9
+ "This tool parse a Wikipedia page into a clean, readable text format."
10
+ )
11
+ inputs: dict[str, dict[str, str]] = {
12
+ "url": {
13
+ "type": "string",
14
+ "description": "The Wikipedia page url.",
15
+ }
16
+ }
17
+ output_type: str = "string"
18
+
19
+ def get_wikipedia_page(self, url: str) -> str:
20
+ """
21
+ Fetches the content of a Wikipedia page given its URL.
22
+ Args:
23
+ url (str): The URL of the Wikipedia page.
24
+ Returns:
25
+ str: The HTML content of the page.
26
+ """
27
+
28
+ headers = {
29
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" # AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
30
+ }
31
+ resp = requests.get(url, headers=headers, timeout=30)
32
+ resp.raise_for_status()
33
+ soup = BeautifulSoup(resp.text, "html.parser")
34
+
35
+ content_div = soup.find("div", id="mw-content-text")
36
+ if not content_div:
37
+ return "Content not found."
38
+
39
+ elements: list[str] = []
40
+ h_tags: list[str] = [f"h{i}" for i in range(1, 6)]
41
+ extra_tags: list[str] = ["p", "ul", "ol"]
42
+ html_tags: list[str] = h_tags + extra_tags
43
+
44
+ for elem in content_div.find_all(html_tags):
45
+ if elem.name in h_tags:
46
+ elements.append("\n\n" + elem.get_text(strip=True) + "\n")
47
+ elif elem.name in extra_tags:
48
+ elements.append(elem.get_text(strip=True))
49
+ elif elem.name == "table":
50
+ elements.append(self.parse_wikipedia_table(elem))
51
+
52
+ return "\n".join(elements)
53
+
54
+ def parse_wikipedia_table(table: Tag) -> str:
55
+ """
56
+ Parses a Wikipedia table into a clean, readable text format.
57
+ Args:
58
+ table (Tag): BeautifulSoup Tag for the table.
59
+ Returns:
60
+ str: Formatted table as readable text.
61
+ """
62
+ rows = []
63
+ headers = []
64
+
65
+ # Try to get headers
66
+ thead = table.find("thead")
67
+ if thead:
68
+ for th in thead.find_all("th"):
69
+ header_text = th.get_text(separator=" ", strip=True)
70
+ headers.append(header_text)
71
+ if headers:
72
+ rows.append(" | ".join(headers))
73
+
74
+ # Parse table body rows
75
+ tbody = table.find("tbody")
76
+ if not tbody:
77
+ tbody = table # fallback: some tables have no tbody explicitly
78
+
79
+ for tr in tbody.find_all("tr"):
80
+ cells = tr.find_all(["th", "td"])
81
+ cell_texts = []
82
+ for cell in cells:
83
+ # Clean references like [7], [note 1], etc.
84
+ for sup in cell.find_all("sup", class_="reference"):
85
+ sup.decompose()
86
+
87
+ text = cell.get_text(separator=" ", strip=True)
88
+ cell_texts.append(text)
89
+
90
+ if cell_texts:
91
+ row_text = " | ".join(cell_texts)
92
+ rows.append(row_text)
93
+
94
+ return "\n".join(rows)
95
+
96
+ def forward(self, url: str) -> str:
97
+ """
98
+ Parses the Wikipedia page and returns the content as a string.
99
+ Args:
100
+ url (str): The URL of the Wikipedia page.
101
+ Returns:
102
+ str: The parsed content of the page.
103
+ """
104
+ html_string = self.get_wikipedia_page(url)
105
+ return html_string
tools/video_analyzer.py CHANGED
@@ -16,6 +16,7 @@ from selenium.webdriver.support.ui import WebDriverWait
16
  from selenium.webdriver.support import expected_conditions as EC
17
  import helium
18
 
 
19
  class WebVideoAnalyzerTool(Tool):
20
  name = "web_video_analyzer"
21
  description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
@@ -31,11 +32,13 @@ class WebVideoAnalyzerTool(Tool):
31
  "duration": {
32
  "type": "integer",
33
  "description": "How many seconds of the video to analyze (default: 30)",
 
34
  },
35
  "interval": {
36
  "type": "integer",
37
  "description": "How often to take screenshots (in seconds, default: 1)",
38
- }
 
39
  }
40
  output_type = "string"
41
 
@@ -43,9 +46,9 @@ class WebVideoAnalyzerTool(Tool):
43
  """Initialize the browser with appropriate settings."""
44
  if self.driver is not None:
45
  return self.driver
46
-
47
  print("Setting up browser...")
48
-
49
  # Configure Chrome options
50
  chrome_options = webdriver.ChromeOptions()
51
  chrome_options.add_argument("--force-device-scale-factor=1")
@@ -53,7 +56,7 @@ class WebVideoAnalyzerTool(Tool):
53
  chrome_options.add_argument("--disable-pdf-viewer")
54
  chrome_options.add_argument("--window-position=0,0")
55
  chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
56
-
57
  # Initialize the driver
58
  self.driver = helium.start_chrome(headless=False, options=chrome_options)
59
  return self.driver
@@ -63,10 +66,10 @@ class WebVideoAnalyzerTool(Tool):
63
  try:
64
  print(f"Navigating to {url}...")
65
  helium.go_to(url)
66
-
67
  # Wait for page to load
68
  time.sleep(3)
69
-
70
  # Handle YouTube-specific interactions
71
  if "youtube.com" in url:
72
  try:
@@ -75,7 +78,7 @@ class WebVideoAnalyzerTool(Tool):
75
  helium.click("Accept all")
76
  elif helium.Button("I agree").exists():
77
  helium.click("I agree")
78
-
79
  # Click on the video to ensure it's playing
80
  try:
81
  # Find the video player element
@@ -83,21 +86,23 @@ class WebVideoAnalyzerTool(Tool):
83
  EC.presence_of_element_located((By.TAG_NAME, "video"))
84
  )
85
  video_element.click()
86
-
87
  # Ensure the video is playing by trying to click the play button if visible
88
  try:
89
- play_button = self.driver.find_element(By.CLASS_NAME, "ytp-play-button")
 
 
90
  if "Play" in play_button.get_attribute("aria-label"):
91
  play_button.click()
92
  except:
93
  pass
94
-
95
  except:
96
  print("Could not locate video element to click")
97
-
98
  except Exception as e:
99
  print(f"Error during YouTube setup: {str(e)}")
100
-
101
  # General approach - try to find and click on any video element
102
  else:
103
  try:
@@ -107,11 +112,11 @@ class WebVideoAnalyzerTool(Tool):
107
  video_elements[0].click()
108
  except Exception as e:
109
  print(f"Could not find or click video element: {str(e)}")
110
-
111
  # Allow video to start
112
  time.sleep(2)
113
  return True
114
-
115
  except Exception as e:
116
  print(f"Error navigating to {url}: {str(e)}")
117
  return False
@@ -121,13 +126,15 @@ class WebVideoAnalyzerTool(Tool):
121
  try:
122
  # Try pressing Escape key to close general popups
123
  webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
124
-
125
  # YouTube-specific: try to close any visible dialog or popup
126
  if "youtube.com" in self.driver.current_url:
127
  # Try to find and click close buttons on popups
128
  try:
129
- close_buttons = self.driver.find_elements(By.CSS_SELECTOR,
130
- "button.ytp-ad-overlay-close-button, button.ytp-ad-skip-button")
 
 
131
  for button in close_buttons:
132
  button.click()
133
  except:
@@ -143,96 +150,106 @@ class WebVideoAnalyzerTool(Tool):
143
  def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
144
  """Count objects of the specified label in a screenshot."""
145
  detector = pipeline("object-detection", model="facebook/detr-resnet-50")
146
-
147
  try:
148
  # Run detection on the image
149
  results = detector(image)
150
-
151
  # Count objects matching the label
152
- object_count = sum(1 for result in results if label.lower() in result["label"].lower())
153
-
 
 
154
  # Debug: print detected classes
155
  detected_classes = [result["label"] for result in results]
156
  if detected_classes:
157
  print(f"Detected classes: {', '.join(detected_classes)}")
158
-
159
  return object_count
160
-
161
  except Exception as e:
162
  print(f"Error detecting objects in screenshot: {str(e)}")
163
  return 0
164
 
165
- def _capture_video_frames(self, duration: int = 30, interval: int = 1, label: str = "") -> List[Dict]:
 
 
166
  """Capture frames from the video at regular intervals."""
167
  results = []
168
-
169
- print(f"Starting frame capture for {duration} seconds with {interval} second intervals...")
 
 
170
  temp_dir = tempfile.mkdtemp()
171
-
172
  for seconds_elapsed in range(0, duration, interval):
173
  # Take screenshot
174
  try:
175
  print(f"Capturing frame at {seconds_elapsed} seconds...")
176
  screenshot = self._take_screenshot()
177
-
178
  # Save screenshot for debugging (optional)
179
  screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
180
  screenshot.save(screenshot_path)
181
-
182
  # Analyze screenshot
183
  object_count = self._analyze_screenshot(screenshot, label)
184
-
185
  # Store results
186
- results.append({
187
- "time": seconds_elapsed,
188
- "object_count": object_count,
189
- "screenshot_path": screenshot_path
190
- })
191
-
 
 
192
  # Wait for next interval
193
  if seconds_elapsed + interval < duration:
194
  time.sleep(interval)
195
-
196
  except Exception as e:
197
  print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
198
-
199
  return results
200
 
201
- def forward(self, url: str, label: str, duration: int = 30, interval: int = 1) -> str:
 
 
202
  """
203
  Analyzes a video on a webpage by taking screenshots and counting objects.
204
-
205
  Args:
206
  url (str): The URL of the webpage containing the video.
207
  label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
208
  duration (int): How many seconds of the video to analyze.
209
  interval (int): How often to take screenshots (in seconds).
210
-
211
  Returns:
212
  str: A detailed report of object counts over time.
213
  """
214
  try:
215
  # Setup the browser
216
  self._setup_browser()
217
-
218
  # Navigate to the video
219
  if not self._navigate_to_video(url):
220
  return f"Error: Could not navigate to or play the video at {url}"
221
-
222
  # Close any popups or overlays
223
  self._close_popups()
224
-
225
  # Capture and analyze frames
226
  frame_results = self._capture_video_frames(duration, interval, label)
227
-
228
  # Calculate summary statistics
229
  if not frame_results:
230
  return f"Error: No frames were successfully captured and analyzed"
231
-
232
  total_objects = sum(result["object_count"] for result in frame_results)
233
  avg_objects = total_objects / len(frame_results)
234
  max_objects = max(frame_results, key=lambda x: x["object_count"])
235
-
236
  # Generate a report
237
  report = [
238
  f"# {label.title()} Count Analysis for Video",
@@ -245,22 +262,24 @@ class WebVideoAnalyzerTool(Tool):
245
  f"Average {label}s per screenshot: {avg_objects:.2f}",
246
  f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
247
  "",
248
- "## Time-based Analysis"
249
  ]
250
-
251
  # Add frame-by-frame details
252
  for result in frame_results:
253
- report.append(f"Time {result['time']} seconds: {result['object_count']} {label}s")
254
-
 
 
255
  # Clean up
256
  try:
257
  helium.kill_browser()
258
  self.driver = None
259
  except:
260
  print("Warning: Could not properly close the browser")
261
-
262
  return "\n".join(report)
263
-
264
  except Exception as e:
265
  # Ensure browser is closed on error
266
  try:
@@ -269,6 +288,5 @@ class WebVideoAnalyzerTool(Tool):
269
  self.driver = None
270
  except:
271
  pass
272
-
273
  return f"Error analyzing video: {str(e)}"
274
-
 
16
  from selenium.webdriver.support import expected_conditions as EC
17
  import helium
18
 
19
+
20
  class WebVideoAnalyzerTool(Tool):
21
  name = "web_video_analyzer"
22
  description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
 
32
  "duration": {
33
  "type": "integer",
34
  "description": "How many seconds of the video to analyze (default: 30)",
35
+ "nullable": True,
36
  },
37
  "interval": {
38
  "type": "integer",
39
  "description": "How often to take screenshots (in seconds, default: 1)",
40
+ "nullable": True,
41
+ },
42
  }
43
  output_type = "string"
44
 
 
46
  """Initialize the browser with appropriate settings."""
47
  if self.driver is not None:
48
  return self.driver
49
+
50
  print("Setting up browser...")
51
+
52
  # Configure Chrome options
53
  chrome_options = webdriver.ChromeOptions()
54
  chrome_options.add_argument("--force-device-scale-factor=1")
 
56
  chrome_options.add_argument("--disable-pdf-viewer")
57
  chrome_options.add_argument("--window-position=0,0")
58
  chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
59
+
60
  # Initialize the driver
61
  self.driver = helium.start_chrome(headless=False, options=chrome_options)
62
  return self.driver
 
66
  try:
67
  print(f"Navigating to {url}...")
68
  helium.go_to(url)
69
+
70
  # Wait for page to load
71
  time.sleep(3)
72
+
73
  # Handle YouTube-specific interactions
74
  if "youtube.com" in url:
75
  try:
 
78
  helium.click("Accept all")
79
  elif helium.Button("I agree").exists():
80
  helium.click("I agree")
81
+
82
  # Click on the video to ensure it's playing
83
  try:
84
  # Find the video player element
 
86
  EC.presence_of_element_located((By.TAG_NAME, "video"))
87
  )
88
  video_element.click()
89
+
90
  # Ensure the video is playing by trying to click the play button if visible
91
  try:
92
+ play_button = self.driver.find_element(
93
+ By.CLASS_NAME, "ytp-play-button"
94
+ )
95
  if "Play" in play_button.get_attribute("aria-label"):
96
  play_button.click()
97
  except:
98
  pass
99
+
100
  except:
101
  print("Could not locate video element to click")
102
+
103
  except Exception as e:
104
  print(f"Error during YouTube setup: {str(e)}")
105
+
106
  # General approach - try to find and click on any video element
107
  else:
108
  try:
 
112
  video_elements[0].click()
113
  except Exception as e:
114
  print(f"Could not find or click video element: {str(e)}")
115
+
116
  # Allow video to start
117
  time.sleep(2)
118
  return True
119
+
120
  except Exception as e:
121
  print(f"Error navigating to {url}: {str(e)}")
122
  return False
 
126
  try:
127
  # Try pressing Escape key to close general popups
128
  webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
129
+
130
  # YouTube-specific: try to close any visible dialog or popup
131
  if "youtube.com" in self.driver.current_url:
132
  # Try to find and click close buttons on popups
133
  try:
134
+ close_buttons = self.driver.find_elements(
135
+ By.CSS_SELECTOR,
136
+ "button.ytp-ad-overlay-close-button, button.ytp-ad-skip-button",
137
+ )
138
  for button in close_buttons:
139
  button.click()
140
  except:
 
150
  def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
151
  """Count objects of the specified label in a screenshot."""
152
  detector = pipeline("object-detection", model="facebook/detr-resnet-50")
153
+
154
  try:
155
  # Run detection on the image
156
  results = detector(image)
157
+
158
  # Count objects matching the label
159
+ object_count = sum(
160
+ 1 for result in results if label.lower() in result["label"].lower()
161
+ )
162
+
163
  # Debug: print detected classes
164
  detected_classes = [result["label"] for result in results]
165
  if detected_classes:
166
  print(f"Detected classes: {', '.join(detected_classes)}")
167
+
168
  return object_count
169
+
170
  except Exception as e:
171
  print(f"Error detecting objects in screenshot: {str(e)}")
172
  return 0
173
 
174
+ def _capture_video_frames(
175
+ self, duration: int = 30, interval: int = 1, label: str = ""
176
+ ) -> List[Dict]:
177
  """Capture frames from the video at regular intervals."""
178
  results = []
179
+
180
+ print(
181
+ f"Starting frame capture for {duration} seconds with {interval} second intervals..."
182
+ )
183
  temp_dir = tempfile.mkdtemp()
184
+
185
  for seconds_elapsed in range(0, duration, interval):
186
  # Take screenshot
187
  try:
188
  print(f"Capturing frame at {seconds_elapsed} seconds...")
189
  screenshot = self._take_screenshot()
190
+
191
  # Save screenshot for debugging (optional)
192
  screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
193
  screenshot.save(screenshot_path)
194
+
195
  # Analyze screenshot
196
  object_count = self._analyze_screenshot(screenshot, label)
197
+
198
  # Store results
199
+ results.append(
200
+ {
201
+ "time": seconds_elapsed,
202
+ "object_count": object_count,
203
+ "screenshot_path": screenshot_path,
204
+ }
205
+ )
206
+
207
  # Wait for next interval
208
  if seconds_elapsed + interval < duration:
209
  time.sleep(interval)
210
+
211
  except Exception as e:
212
  print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
213
+
214
  return results
215
 
216
+ def forward(
217
+ self, url: str, label: str, duration: int = 30, interval: int = 1
218
+ ) -> str:
219
  """
220
  Analyzes a video on a webpage by taking screenshots and counting objects.
221
+
222
  Args:
223
  url (str): The URL of the webpage containing the video.
224
  label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
225
  duration (int): How many seconds of the video to analyze.
226
  interval (int): How often to take screenshots (in seconds).
227
+
228
  Returns:
229
  str: A detailed report of object counts over time.
230
  """
231
  try:
232
  # Setup the browser
233
  self._setup_browser()
234
+
235
  # Navigate to the video
236
  if not self._navigate_to_video(url):
237
  return f"Error: Could not navigate to or play the video at {url}"
238
+
239
  # Close any popups or overlays
240
  self._close_popups()
241
+
242
  # Capture and analyze frames
243
  frame_results = self._capture_video_frames(duration, interval, label)
244
+
245
  # Calculate summary statistics
246
  if not frame_results:
247
  return f"Error: No frames were successfully captured and analyzed"
248
+
249
  total_objects = sum(result["object_count"] for result in frame_results)
250
  avg_objects = total_objects / len(frame_results)
251
  max_objects = max(frame_results, key=lambda x: x["object_count"])
252
+
253
  # Generate a report
254
  report = [
255
  f"# {label.title()} Count Analysis for Video",
 
262
  f"Average {label}s per screenshot: {avg_objects:.2f}",
263
  f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
264
  "",
265
+ "## Time-based Analysis",
266
  ]
267
+
268
  # Add frame-by-frame details
269
  for result in frame_results:
270
+ report.append(
271
+ f"Time {result['time']} seconds: {result['object_count']} {label}s"
272
+ )
273
+
274
  # Clean up
275
  try:
276
  helium.kill_browser()
277
  self.driver = None
278
  except:
279
  print("Warning: Could not properly close the browser")
280
+
281
  return "\n".join(report)
282
+
283
  except Exception as e:
284
  # Ensure browser is closed on error
285
  try:
 
288
  self.driver = None
289
  except:
290
  pass
291
+
292
  return f"Error analyzing video: {str(e)}"
 
tools/web_utils.py CHANGED
@@ -5,6 +5,7 @@ from selenium.webdriver.common.by import By
5
 
6
  driver = None
7
 
 
8
  @tool
9
  def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
10
  """
@@ -16,19 +17,23 @@ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
16
  if driver:
17
  elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
18
  if nth_result > len(elements):
19
- raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
 
 
20
  result = f"Found {len(elements)} matches for '{text}'."
21
  elem = elements[nth_result - 1]
22
  driver.execute_script("arguments[0].scrollIntoView(true);", elem)
23
  result += f"Focused on element {nth_result} of {len(elements)}"
24
  return result
25
 
 
26
  @tool
27
  def go_back() -> None:
28
  """Goes back to previous page."""
29
  if driver:
30
  driver.back()
31
 
 
32
  @tool
33
  def close_popups() -> str:
34
  """
 
5
 
6
  driver = None
7
 
8
+
9
  @tool
10
  def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
11
  """
 
17
  if driver:
18
  elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
19
  if nth_result > len(elements):
20
+ raise Exception(
21
+ f"Match n°{nth_result} not found (only {len(elements)} matches found)"
22
+ )
23
  result = f"Found {len(elements)} matches for '{text}'."
24
  elem = elements[nth_result - 1]
25
  driver.execute_script("arguments[0].scrollIntoView(true);", elem)
26
  result += f"Focused on element {nth_result} of {len(elements)}"
27
  return result
28
 
29
+
30
  @tool
31
  def go_back() -> None:
32
  """Goes back to previous page."""
33
  if driver:
34
  driver.back()
35
 
36
+
37
  @tool
38
  def close_popups() -> str:
39
  """
tools/webpage_parser.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ from smolagents import Tool
3
+
4
+
5
+ class WebpageParser(Tool):
6
+ name: str = "webpage_parser_tool"
7
+ description: str = (
8
+ "This tool parses elements from HTML to make them easily searchable."
9
+ )
10
+ inputs: dict[str, dict[str, str]] = {
11
+ "html_string": {
12
+ "type": "string",
13
+ "description": "The HTML content as a string.",
14
+ },
15
+ }
16
+ output_type: str = "array"
17
+
18
+ def forward(self, html_string: str) -> list[str]:
19
+ """
20
+ Parses the HTML string and returns all elements as an array.
21
+ """
22
+ # Create a BeautifulSoup object
23
+ soup = BeautifulSoup(html_string, "html.parser")
24
+
25
+ # Extract all elements as strings
26
+ elements = [str(element) for element in soup.find_all()]
27
+
28
+ return elements
utils/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
  import requests
2
  from smolagents import CodeAgent
3
  from tqdm import tqdm
 
4
 
5
  DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
6
 
@@ -50,12 +51,15 @@ def run_agent(agent: CodeAgent, questions: list[dict]) -> list[str]:
50
  for question in tqdm(questions, desc="Running agent"):
51
  task_id = question.get("task_id")
52
  question_text = question.get("question")
 
 
 
53
  if not task_id or question_text is None:
54
  print(f"Skipping item with missing task_id or question: {question}")
55
  continue
56
 
57
  try:
58
- answer = agent(question_text)
59
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
60
  results_log.append(
61
  {
 
1
  import requests
2
  from smolagents import CodeAgent
3
  from tqdm import tqdm
4
+ from prompts.default_prompt import generate_prompt
5
 
6
  DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
7
 
 
51
  for question in tqdm(questions, desc="Running agent"):
52
  task_id = question.get("task_id")
53
  question_text = question.get("question")
54
+ file_name = question.get("file_name")
55
+ prompt = generate_prompt(question_text, file_name)
56
+
57
  if not task_id or question_text is None:
58
  print(f"Skipping item with missing task_id or question: {question}")
59
  continue
60
 
61
  try:
62
+ answer = agent(prompt)
63
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
64
  results_log.append(
65
  {
uv.lock CHANGED
@@ -342,6 +342,15 @@ wheels = [
342
  { url = "https://files.pythonhosted.org/packages/83/a2/66adca41164860dee6d2d47b506fef3262c8879aab727b687c798d67313f/duckduckgo_search-8.0.1-py3-none-any.whl", hash = "sha256:87ea18d9abb1cd5dc8f63fc70ac867996acce2cb5e0129d191b9491c202420be", size = 18125 },
343
  ]
344
 
 
 
 
 
 
 
 
 
 
345
  [[package]]
346
  name = "fastapi"
347
  version = "0.115.12"
@@ -544,13 +553,17 @@ name = "hf-agents-gaia-agent"
544
  version = "0.1.0"
545
  source = { virtual = "." }
546
  dependencies = [
 
547
  { name = "ffmpeg" },
548
  { name = "gradio", extra = ["oauth"] },
549
  { name = "helium" },
550
  { name = "litellm" },
 
551
  { name = "numpy" },
552
  { name = "openai" },
 
553
  { name = "opencv-python" },
 
554
  { name = "pandas" },
555
  { name = "pillow" },
556
  { name = "python-dotenv" },
@@ -567,13 +580,17 @@ dependencies = [
567
 
568
  [package.metadata]
569
  requires-dist = [
 
570
  { name = "ffmpeg", specifier = ">=1.4" },
571
  { name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
572
  { name = "helium", specifier = ">=5.1.1" },
573
  { name = "litellm", specifier = "==1.67.1" },
 
574
  { name = "numpy", specifier = ">=2.2.5" },
575
  { name = "openai", specifier = ">=1.76.0" },
 
576
  { name = "opencv-python", specifier = ">=4.11.0.86" },
 
577
  { name = "pandas", specifier = ">=2.2.3" },
578
  { name = "pillow", specifier = ">=11.2.1" },
579
  { name = "python-dotenv", specifier = ">=1.1.0" },
@@ -760,6 +777,24 @@ wheels = [
760
  { url = "https://files.pythonhosted.org/packages/88/86/c14d3c24ae13c08296d068e6f79fd4bd17a0a07bddbda94990b87c35d20e/litellm-1.67.1-py3-none-any.whl", hash = "sha256:8fff5b2a16b63bb594b94d6c071ad0f27d3d8cd4348bd5acea2fd40c8e0c11e8", size = 7607266 },
761
  ]
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  [[package]]
764
  name = "lxml"
765
  version = "5.4.0"
@@ -874,6 +909,15 @@ wheels = [
874
  { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
875
  ]
876
 
 
 
 
 
 
 
 
 
 
877
  [[package]]
878
  name = "mpmath"
879
  version = "1.3.0"
@@ -952,6 +996,28 @@ wheels = [
952
  { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
953
  ]
954
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
955
  [[package]]
956
  name = "numpy"
957
  version = "2.2.5"
@@ -1142,6 +1208,21 @@ wheels = [
1142
  { url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
1143
  ]
1144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1145
  [[package]]
1146
  name = "opencv-python"
1147
  version = "4.11.0.86"
@@ -1159,6 +1240,18 @@ wheels = [
1159
  { url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
1160
  ]
1161
 
 
 
 
 
 
 
 
 
 
 
 
 
1162
  [[package]]
1163
  name = "orjson"
1164
  version = "3.10.16"
 
342
  { url = "https://files.pythonhosted.org/packages/83/a2/66adca41164860dee6d2d47b506fef3262c8879aab727b687c798d67313f/duckduckgo_search-8.0.1-py3-none-any.whl", hash = "sha256:87ea18d9abb1cd5dc8f63fc70ac867996acce2cb5e0129d191b9491c202420be", size = 18125 },
343
  ]
344
 
345
+ [[package]]
346
+ name = "et-xmlfile"
347
+ version = "2.0.0"
348
+ source = { registry = "https://pypi.org/simple" }
349
+ sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
350
+ wheels = [
351
+ { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
352
+ ]
353
+
354
  [[package]]
355
  name = "fastapi"
356
  version = "0.115.12"
 
553
  version = "0.1.0"
554
  source = { virtual = "." }
555
  dependencies = [
556
+ { name = "beautifulsoup4" },
557
  { name = "ffmpeg" },
558
  { name = "gradio", extra = ["oauth"] },
559
  { name = "helium" },
560
  { name = "litellm" },
561
+ { name = "markdownify" },
562
  { name = "numpy" },
563
  { name = "openai" },
564
+ { name = "openai-whisper" },
565
  { name = "opencv-python" },
566
+ { name = "openpyxl" },
567
  { name = "pandas" },
568
  { name = "pillow" },
569
  { name = "python-dotenv" },
 
580
 
581
  [package.metadata]
582
  requires-dist = [
583
+ { name = "beautifulsoup4", specifier = ">=4.13.4" },
584
  { name = "ffmpeg", specifier = ">=1.4" },
585
  { name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
586
  { name = "helium", specifier = ">=5.1.1" },
587
  { name = "litellm", specifier = "==1.67.1" },
588
+ { name = "markdownify", specifier = ">=1.1.0" },
589
  { name = "numpy", specifier = ">=2.2.5" },
590
  { name = "openai", specifier = ">=1.76.0" },
591
+ { name = "openai-whisper", specifier = ">=20240930" },
592
  { name = "opencv-python", specifier = ">=4.11.0.86" },
593
+ { name = "openpyxl", specifier = ">=3.1.5" },
594
  { name = "pandas", specifier = ">=2.2.3" },
595
  { name = "pillow", specifier = ">=11.2.1" },
596
  { name = "python-dotenv", specifier = ">=1.1.0" },
 
777
  { url = "https://files.pythonhosted.org/packages/88/86/c14d3c24ae13c08296d068e6f79fd4bd17a0a07bddbda94990b87c35d20e/litellm-1.67.1-py3-none-any.whl", hash = "sha256:8fff5b2a16b63bb594b94d6c071ad0f27d3d8cd4348bd5acea2fd40c8e0c11e8", size = 7607266 },
778
  ]
779
 
780
+ [[package]]
781
+ name = "llvmlite"
782
+ version = "0.44.0"
783
+ source = { registry = "https://pypi.org/simple" }
784
+ sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880 }
785
+ wheels = [
786
+ { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297 },
787
+ { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105 },
788
+ { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901 },
789
+ { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247 },
790
+ { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380 },
791
+ { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306 },
792
+ { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090 },
793
+ { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904 },
794
+ { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245 },
795
+ { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193 },
796
+ ]
797
+
798
  [[package]]
799
  name = "lxml"
800
  version = "5.4.0"
 
909
  { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
910
  ]
911
 
912
+ [[package]]
913
+ name = "more-itertools"
914
+ version = "10.7.0"
915
+ source = { registry = "https://pypi.org/simple" }
916
+ sdist = { url = "https://files.pythonhosted.org/packages/ce/a0/834b0cebabbfc7e311f30b46c8188790a37f89fc8d756660346fe5abfd09/more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3", size = 127671 }
917
+ wheels = [
918
+ { url = "https://files.pythonhosted.org/packages/2b/9f/7ba6f94fc1e9ac3d2b853fdff3035fb2fa5afbed898c4a72b8a020610594/more_itertools-10.7.0-py3-none-any.whl", hash = "sha256:d43980384673cb07d2f7d2d918c616b30c659c089ee23953f601d6609c67510e", size = 65278 },
919
+ ]
920
+
921
  [[package]]
922
  name = "mpmath"
923
  version = "1.3.0"
 
996
  { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
997
  ]
998
 
999
+ [[package]]
1000
+ name = "numba"
1001
+ version = "0.61.2"
1002
+ source = { registry = "https://pypi.org/simple" }
1003
+ dependencies = [
1004
+ { name = "llvmlite" },
1005
+ { name = "numpy" },
1006
+ ]
1007
+ sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615 }
1008
+ wheels = [
1009
+ { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626 },
1010
+ { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287 },
1011
+ { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928 },
1012
+ { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115 },
1013
+ { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929 },
1014
+ { url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785 },
1015
+ { url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289 },
1016
+ { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918 },
1017
+ { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056 },
1018
+ { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846 },
1019
+ ]
1020
+
1021
  [[package]]
1022
  name = "numpy"
1023
  version = "2.2.5"
 
1208
  { url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
1209
  ]
1210
 
1211
+ [[package]]
1212
+ name = "openai-whisper"
1213
+ version = "20240930"
1214
+ source = { registry = "https://pypi.org/simple" }
1215
+ dependencies = [
1216
+ { name = "more-itertools" },
1217
+ { name = "numba" },
1218
+ { name = "numpy" },
1219
+ { name = "tiktoken" },
1220
+ { name = "torch" },
1221
+ { name = "tqdm" },
1222
+ { name = "triton", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or sys_platform == 'linux2'" },
1223
+ ]
1224
+ sdist = { url = "https://files.pythonhosted.org/packages/f5/77/952ca71515f81919bd8a6a4a3f89a27b09e73880cebf90957eda8f2f8545/openai-whisper-20240930.tar.gz", hash = "sha256:b7178e9c1615576807a300024f4daa6353f7e1a815dac5e38c33f1ef055dd2d2", size = 800544 }
1225
+
1226
  [[package]]
1227
  name = "opencv-python"
1228
  version = "4.11.0.86"
 
1240
  { url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
1241
  ]
1242
 
1243
+ [[package]]
1244
+ name = "openpyxl"
1245
+ version = "3.1.5"
1246
+ source = { registry = "https://pypi.org/simple" }
1247
+ dependencies = [
1248
+ { name = "et-xmlfile" },
1249
+ ]
1250
+ sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
1251
+ wheels = [
1252
+ { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
1253
+ ]
1254
+
1255
  [[package]]
1256
  name = "orjson"
1257
  version = "3.10.16"