Final_Assignment_Template

Sleeping

App Files Files Community

altozachmo commited on May 3

Commit

ca6fbc3

1 Parent(s): 968a67a

Attempt with wikipedia parsing tools

Browse files

Files changed (14) hide show

agents/agent.py +1 -5
agents/video_agent.py +14 -4
app.py +46 -10
prompts/default_prompt.py +45 -0
pyproject.toml +4 -0
run_local_agent.py +42 -2
tools/__init__.py +34 -0
tools/open_files.py +95 -0
tools/parse_wikipedia_table.py +105 -0
tools/video_analyzer.py +73 -55
tools/web_utils.py +6 -1
tools/webpage_parser.py +28 -0
utils/__init__.py +5 -1
uv.lock +93 -0

agents/agent.py CHANGED Viewed

@@ -1,15 +1,11 @@
 from smolagents import (
     CodeAgent,
-    DuckDuckGoSearchTool,
-    WikipediaSearchTool,
     LiteLLMModel,
     Tool,
 )
-from tools.text_search import TextSearch
-from tools.text_splitter import text_splitter
-from tools.video_analyzer import WebVideoAnalyzerTool
 from typing import Callable
 class MyAgent:
     def __init__(
         self,

 from smolagents import (
     CodeAgent,
     LiteLLMModel,
     Tool,
 )
 from typing import Callable
 class MyAgent:
     def __init__(
         self,

agents/video_agent.py CHANGED Viewed

@@ -34,20 +34,30 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
     driver = helium.get_driver()
     current_step = memory_step.step_number
     if driver is not None:
-        for previous_memory_step in agent.memory.steps:  # Remove previous screenshots for lean processing
-            if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
                 previous_memory_step.observations_images = None
         png_bytes = driver.get_screenshot_as_png()
         image = Image.open(BytesIO(png_bytes))
         print(f"Captured a browser screenshot: {image.size} pixels")
-        memory_step.observations_images = [image.copy()]  # Create a copy to ensure it persists
     # Update observations with current URL
     url_info = f"Current url: {driver.current_url}"
     memory_step.observations = (
-        url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
     )
 video_agent = MyAgent(
     api_key=os.getenv("GEMINI_API_KEY"),
     temperature=0.0,

     driver = helium.get_driver()
     current_step = memory_step.step_number
     if driver is not None:
+        for (
+            previous_memory_step
+        ) in agent.memory.steps:  # Remove previous screenshots for lean processing
+            if (
+                isinstance(previous_memory_step, ActionStep)
+                and previous_memory_step.step_number <= current_step - 2
+            ):
                 previous_memory_step.observations_images = None
         png_bytes = driver.get_screenshot_as_png()
         image = Image.open(BytesIO(png_bytes))
         print(f"Captured a browser screenshot: {image.size} pixels")
+        memory_step.observations_images = [
+            image.copy()
+        ]  # Create a copy to ensure it persists
     # Update observations with current URL
     url_info = f"Current url: {driver.current_url}"
     memory_step.observations = (
+        url_info
+        if memory_step.observations is None
+        else memory_step.observations + "\n" + url_info
     )
 video_agent = MyAgent(
     api_key=os.getenv("GEMINI_API_KEY"),
     temperature=0.0,

app.py CHANGED Viewed

@@ -5,10 +5,51 @@ import pandas as pd
 from agents.agent import MyAgent
 import time
 from tqdm import tqdm
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -30,14 +71,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = MyAgent(
-            provider="litellm",
-            model_id="gemini/gemini-2.0-flash-lite",
-            api_key=os.getenv("GEMINI_API_KEY"),
-            planning_interval=3,
-            num_ctx=8192,
-            temperature=0.2,
-        )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
@@ -72,17 +106,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in tqdm(
-        questions_data[0:3],
         desc="Agent is answering questions...",
         total=len(questions_data),
     ):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             time.sleep(30)  # to avoid rate limiting
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}

 from agents.agent import MyAgent
 import time
 from tqdm import tqdm
+from prompts.default_prompt import generate_prompt
+from smolagents import (
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+)
+from tools.text_search import TextSearch
+from tools.text_splitter import text_splitter
+from tools.webpage_parser import WebpageParser
+from tools.parse_wikipedia_table import WikipediaParser
+from tools.open_files import OpenFilesTool
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+myagent_args = {
+    "provider": "litellm",
+    "model_id": "gemini/gemini-2.0-flash-lite",
+    # "api_base": OLLAMA_API_BASE,
+    "planning_interval": 3,
+    "tools": [
+        DuckDuckGoSearchTool(),
+        WikipediaParser(),
+        VisitWebpageTool(),
+        TextSearch(),
+        text_splitter,
+        WebpageParser(),
+        OpenFilesTool(),
+    ],
+    "additional_authorized_imports": [
+        "pandas",
+        "numpy",
+        "datetime",
+        "json",
+        "re",
+        "math",
+        "os",
+        "requests",
+        "csv",
+        "urllib",
+    ],
+    "num_ctx": 8192,
+    "temperature": 0.2,
+}
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = MyAgent(**myagent_args)
     except Exception as e:
         print(f"Error instantiating agent: {e}")
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in tqdm(
+        questions_data,
         desc="Agent is answering questions...",
         total=len(questions_data),
     ):
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
+        prompt = generate_prompt(question_text, file_name)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent(prompt)
             time.sleep(30)  # to avoid rate limiting
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}

prompts/default_prompt.py ADDED Viewed

	@@ -0,0 +1,45 @@

+def generate_prompt(question_text, file_name):
+    """
+    Generates a prompt for the agent based on the provided question text and file name.
+    Args:
+        question_text (str): The question to be answered.
+        file_name (str): The name of the file to be used in the task.
+    Returns:
+        str: The generated prompt.
+    """
+    # Define the full prompt with instructions and guidelines
+    full_prompt = f"""You are a highly precise answering agent.
+    When given a question:
+    - If necessary, perform a web search using the tool `DuckDuckGoSearchTool` to find possible sources of information.
+    - Use the `visit_webpage` tool to visit the webpage and extract the content in markdown format.
+    - If the web search only returns titles and short snippets, you MUST visit the actual webpage to read the full content before answering.
+    - Use the `WikipediaParser` tool to fetch and read the Wikipedia page when necessary.
+    - You just have the ability to read Wikipedia pages only.
+    - If the task requires reading, listening, or analyzing a file, you must use the file specified in the `file_name` field of the task metadata, not the file name mentioned casually inside the question text.
+    - Comma separated lists MUST contain a single space after each comma.
+    - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+    - If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+    - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+    - Only answer after you have gathered enough information by reading the actual page contents.
+    - Once you have the final answer, you must call `final_answer("your_answer")` immediately after printing it.
+    - Do not retry or execute anything else after calling `final_answer`.
+    - `final_answer` must wrap the exact printed value.
+    Provide ONLY the precise answer requested.
+    Do not include explanations, steps, reasoning, or additional text.
+    Be direct and specific. GAIA benchmark requires exact matching answers.
+    Example: if asked "What is the capital of France?", respond exactly:
+    Thoughts: I need to retrieve the capital of France from Wikipedia and output it directly.
+    Code:
+    ```py
+    print("Paris")
+    ```<end_code>
+    Based on the above guidelines, answer the following question:
+    --begin of question--
+    {question_text}
+    --end of question--
+    If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
+    file_name: {file_name}"""
+    return full_prompt

pyproject.toml CHANGED Viewed

@@ -5,13 +5,17 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "ffmpeg>=1.4",
     "gradio[oauth]>=5.27.0",
     "helium>=5.1.1",
     "litellm==1.67.1",
     "numpy>=2.2.5",
     "openai>=1.76.0",
     "opencv-python>=4.11.0.86",
     "pandas>=2.2.3",
     "pillow>=11.2.1",
     "python-dotenv>=1.1.0",

 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "beautifulsoup4>=4.13.4",
     "ffmpeg>=1.4",
     "gradio[oauth]>=5.27.0",
     "helium>=5.1.1",
     "litellm==1.67.1",
+    "markdownify>=1.1.0",
     "numpy>=2.2.5",
     "openai>=1.76.0",
+    "openai-whisper>=20240930",
     "opencv-python>=4.11.0.86",
+    "openpyxl>=3.1.5",
     "pandas>=2.2.3",
     "pillow>=11.2.1",
     "python-dotenv>=1.1.0",

run_local_agent.py CHANGED Viewed

@@ -1,5 +1,17 @@
 from agents.agent import MyAgent
 from utils import run_agent
 import os
 import json
@@ -13,12 +25,35 @@ OLLAMA_API_BASE: str = os.getenv("OLLAMA_API_BASE", default="http://localhost:11
 OLLAMA_API_KEY: str | None = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
 OLLAMA_NUM_CTX: int = int(os.getenv("OLLAMA_NUM_CTX", default=8192))
 myagent_args = {
     "provider": "litellm",
     "model_id": "gemini/gemini-2.0-flash-lite",
     # "api_base": OLLAMA_API_BASE,
     "planning_interval": 3,
-    "num_ctx": OLLAMA_NUM_CTX,
 }
 print(f"Using args: {myagent_args}")
@@ -29,6 +64,11 @@ if __name__ == "__main__":
     with open(QUESTIONS_FILEPATH, "r") as f:
         questions = json.load(f)
-    answers = run_agent(agent, [questions[1]])
     print("Answers:", answers)
     print("Finished running the agent.")

 from agents.agent import MyAgent
 from utils import run_agent
+from smolagents import (
+    DuckDuckGoSearchTool,
+    # WikipediaSearchTool,
+    VisitWebpageTool,
+)
+from tools.text_search import TextSearch
+from tools.text_splitter import text_splitter
+from tools.webpage_parser import WebpageParser
+from tools.parse_wikipedia_table import WikipediaParser
+from tools.open_files import OpenFilesTool
+from prompts.default_prompt import generate_prompt
 import os
 import json
 OLLAMA_API_KEY: str | None = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
 OLLAMA_NUM_CTX: int = int(os.getenv("OLLAMA_NUM_CTX", default=8192))
 myagent_args = {
     "provider": "litellm",
     "model_id": "gemini/gemini-2.0-flash-lite",
     # "api_base": OLLAMA_API_BASE,
     "planning_interval": 3,
+    "tools": [
+        DuckDuckGoSearchTool(),
+        WikipediaParser(),
+        VisitWebpageTool(),
+        TextSearch(),
+        text_splitter,
+        WebpageParser(),
+        OpenFilesTool(),
+    ],
+    "additional_authorized_imports": [
+        "pandas",
+        "numpy",
+        "datetime",
+        "json",
+        "re",
+        "math",
+        "os",
+        "requests",
+        "csv",
+        "urllib",
+    ],
+    "num_ctx": 8192,
+    "temperature": 0.2,
 }
 print(f"Using args: {myagent_args}")
     with open(QUESTIONS_FILEPATH, "r") as f:
         questions = json.load(f)
+    question = questions[0]
+    question_text = question.get("question")
+    file_name = question.get("file_name")
+    prompt = generate_prompt(question_text, file_name)
+    answers = run_agent(agent, [questions[0]])
     print("Answers:", answers)
     print("Finished running the agent.")

tools/__init__.py CHANGED Viewed

	@@ -0,0 +1,34 @@

+import re
+import requests
+from markdownify import markdownify
+from requests.exceptions import RequestException
+from smolagents import tool
+@tool
+def visit_webpage(url: str) -> str:
+    """Visits a webpage at the given URL and returns its content as a markdown string.
+    Args:
+        url: The URL of the webpage to visit.
+    Returns:
+        The content of the webpage converted to Markdown, or an error message if the request fails.
+    """
+    try:
+        # Send a GET request to the URL
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        # Convert the HTML content to Markdown
+        markdown_content = markdownify(response.text).strip()
+        # Remove multiple line breaks
+        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+        return markdown_content
+    except RequestException as e:
+        return f"Error fetching the webpage: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"

tools/open_files.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from smolagents import Tool
+import os
+import json
+import csv
+import openpyxl
+import whisper
+import requests
+class OpenFilesTool(Tool):
+    name = "open_files_tool"
+    description = (
+        "This tool opens files and returns their content as a string. "
+        "It can handle text, CSV, JSON, XLSX, and MP3 file types."
+    )
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "The path to the file to be opened.",
+        },
+        "file_type": {
+            "type": "string",
+            "description": "The type of the file (text, csv, json, xlsx, mp3). Default is 'text'.",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def download_file(self, file_name: str) -> None:
+        if not os.path.exists(file_name):
+            url = f"https://agents-course-unit4-scoring.hf.space/files/{file_name.split('.')[0]}"
+            r = requests.get(url)
+            with open(file_name, "wb") as f:
+                f.write(r.content)
+    def open_file_as_text(self, file_name: str, filetype: str = "txt") -> str:
+        """
+        Opens a file and returns its content as readable text.
+        Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (transcribes speech to text).
+        Args:
+            file_name (str): The path or name of the file.
+            filetype (Optional[str]): Type of file ('txt', 'json', 'csv', 'xlsx', 'mp3'). Defaults to 'txt'.
+        Returns:
+            str: The content of the file as text, or transcribed speech if 'mp3'.
+        """
+        self.download_file(file_name)
+        try:
+            if filetype == "txt":
+                with open(file_name, "r", encoding="utf-8") as f:
+                    return f.read()
+            elif filetype == "json":
+                with open(file_name, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                return json.dumps(data, indent=2)
+            elif filetype == "csv":
+                with open(file_name, "r", encoding="utf-8") as f:
+                    reader = csv.reader(f)
+                    rows = list(reader)
+                return "\n".join([", ".join(row) for row in rows])
+            elif filetype == "xlsx":
+                wb = openpyxl.load_workbook(file_name, data_only=True)
+                sheet = wb.active
+                content = []
+                for row in sheet.iter_rows(values_only=True):
+                    content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
+                return "\n".join(content)
+            elif filetype == "mp3":
+                w = whisper.load_model("base")
+                res = w.transcribe(file_name)
+                return res["text"]
+            else:
+                return f"Unsupported filetype '{filetype}'. Supported types are 'txt', 'json', 'csv', 'xlsx', and 'mp3'."
+        except FileNotFoundError:
+            return f"File '{file_name}' not found."
+        except Exception as e:
+            return f"Error opening file '{file_name}': {str(e)}"
+    def forward(self, file_path: str, file_type: str = "text") -> str:
+        """
+        Opens a file and returns its content as a string.
+        Args:
+            file_path (str): The path to the file to be opened.
+            file_type (str): The type of the file (text, csv, json, xlsx, mp3). Default is 'text'.
+        Returns:
+            str: The content of the file as a string.
+        """
+        return self.open_file_as_text(file_path, file_type)

tools/parse_wikipedia_table.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from smolagents import Tool
+import requests
+from bs4 import BeautifulSoup, Tag
+class WikipediaParser(Tool):
+    name: str = "wikipedia_parser_tool"
+    description: str = (
+        "This tool parse a Wikipedia page into a clean, readable text format."
+    )
+    inputs: dict[str, dict[str, str]] = {
+        "url": {
+            "type": "string",
+            "description": "The Wikipedia page url.",
+        }
+    }
+    output_type: str = "string"
+    def get_wikipedia_page(self, url: str) -> str:
+        """
+        Fetches the content of a Wikipedia page given its URL.
+        Args:
+            url (str): The URL of the Wikipedia page.
+        Returns:
+            str: The HTML content of the page.
+        """
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"  # AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+        }
+        resp = requests.get(url, headers=headers, timeout=30)
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, "html.parser")
+        content_div = soup.find("div", id="mw-content-text")
+        if not content_div:
+            return "Content not found."
+        elements: list[str] = []
+        h_tags: list[str] = [f"h{i}" for i in range(1, 6)]
+        extra_tags: list[str] = ["p", "ul", "ol"]
+        html_tags: list[str] = h_tags + extra_tags
+        for elem in content_div.find_all(html_tags):
+            if elem.name in h_tags:
+                elements.append("\n\n" + elem.get_text(strip=True) + "\n")
+            elif elem.name in extra_tags:
+                elements.append(elem.get_text(strip=True))
+            elif elem.name == "table":
+                elements.append(self.parse_wikipedia_table(elem))
+        return "\n".join(elements)
+    def parse_wikipedia_table(table: Tag) -> str:
+        """
+        Parses a Wikipedia table into a clean, readable text format.
+        Args:
+            table (Tag): BeautifulSoup Tag for the table.
+        Returns:
+            str: Formatted table as readable text.
+        """
+        rows = []
+        headers = []
+        # Try to get headers
+        thead = table.find("thead")
+        if thead:
+            for th in thead.find_all("th"):
+                header_text = th.get_text(separator=" ", strip=True)
+                headers.append(header_text)
+            if headers:
+                rows.append(" | ".join(headers))
+        # Parse table body rows
+        tbody = table.find("tbody")
+        if not tbody:
+            tbody = table  # fallback: some tables have no tbody explicitly
+        for tr in tbody.find_all("tr"):
+            cells = tr.find_all(["th", "td"])
+            cell_texts = []
+            for cell in cells:
+                # Clean references like [7], [note 1], etc.
+                for sup in cell.find_all("sup", class_="reference"):
+                    sup.decompose()
+                text = cell.get_text(separator=" ", strip=True)
+                cell_texts.append(text)
+            if cell_texts:
+                row_text = " | ".join(cell_texts)
+                rows.append(row_text)
+        return "\n".join(rows)
+    def forward(self, url: str) -> str:
+        """
+        Parses the Wikipedia page and returns the content as a string.
+        Args:
+            url (str): The URL of the Wikipedia page.
+        Returns:
+            str: The parsed content of the page.
+        """
+        html_string = self.get_wikipedia_page(url)
+        return html_string

tools/video_analyzer.py CHANGED Viewed

@@ -16,6 +16,7 @@ from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import helium
 class WebVideoAnalyzerTool(Tool):
     name = "web_video_analyzer"
     description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
@@ -31,11 +32,13 @@ class WebVideoAnalyzerTool(Tool):
         "duration": {
             "type": "integer",
             "description": "How many seconds of the video to analyze (default: 30)",
         },
         "interval": {
             "type": "integer",
             "description": "How often to take screenshots (in seconds, default: 1)",
-        }
     }
     output_type = "string"
@@ -43,9 +46,9 @@ class WebVideoAnalyzerTool(Tool):
         """Initialize the browser with appropriate settings."""
         if self.driver is not None:
             return self.driver
         print("Setting up browser...")
         # Configure Chrome options
         chrome_options = webdriver.ChromeOptions()
         chrome_options.add_argument("--force-device-scale-factor=1")
@@ -53,7 +56,7 @@ class WebVideoAnalyzerTool(Tool):
         chrome_options.add_argument("--disable-pdf-viewer")
         chrome_options.add_argument("--window-position=0,0")
         chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
         # Initialize the driver
         self.driver = helium.start_chrome(headless=False, options=chrome_options)
         return self.driver
@@ -63,10 +66,10 @@ class WebVideoAnalyzerTool(Tool):
         try:
             print(f"Navigating to {url}...")
             helium.go_to(url)
             # Wait for page to load
             time.sleep(3)
             # Handle YouTube-specific interactions
             if "youtube.com" in url:
                 try:
@@ -75,7 +78,7 @@ class WebVideoAnalyzerTool(Tool):
                         helium.click("Accept all")
                     elif helium.Button("I agree").exists():
                         helium.click("I agree")
                     # Click on the video to ensure it's playing
                     try:
                         # Find the video player element
@@ -83,21 +86,23 @@ class WebVideoAnalyzerTool(Tool):
                             EC.presence_of_element_located((By.TAG_NAME, "video"))
                         )
                         video_element.click()
                         # Ensure the video is playing by trying to click the play button if visible
                         try:
-                            play_button = self.driver.find_element(By.CLASS_NAME, "ytp-play-button")
                             if "Play" in play_button.get_attribute("aria-label"):
                                 play_button.click()
                         except:
                             pass
                     except:
                         print("Could not locate video element to click")
                 except Exception as e:
                     print(f"Error during YouTube setup: {str(e)}")
             # General approach - try to find and click on any video element
             else:
                 try:
@@ -107,11 +112,11 @@ class WebVideoAnalyzerTool(Tool):
                         video_elements[0].click()
                 except Exception as e:
                     print(f"Could not find or click video element: {str(e)}")
             # Allow video to start
             time.sleep(2)
             return True
         except Exception as e:
             print(f"Error navigating to {url}: {str(e)}")
             return False
@@ -121,13 +126,15 @@ class WebVideoAnalyzerTool(Tool):
         try:
             # Try pressing Escape key to close general popups
             webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
             # YouTube-specific: try to close any visible dialog or popup
             if "youtube.com" in self.driver.current_url:
                 # Try to find and click close buttons on popups
                 try:
-                    close_buttons = self.driver.find_elements(By.CSS_SELECTOR,
-                                                             "button.ytp-ad-overlay-close-button, button.ytp-ad-skip-button")
                     for button in close_buttons:
                         button.click()
                 except:
@@ -143,96 +150,106 @@ class WebVideoAnalyzerTool(Tool):
     def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
         """Count objects of the specified label in a screenshot."""
         detector = pipeline("object-detection", model="facebook/detr-resnet-50")
         try:
             # Run detection on the image
             results = detector(image)
             # Count objects matching the label
-            object_count = sum(1 for result in results if label.lower() in result["label"].lower())
             # Debug: print detected classes
             detected_classes = [result["label"] for result in results]
             if detected_classes:
                 print(f"Detected classes: {', '.join(detected_classes)}")
             return object_count
         except Exception as e:
             print(f"Error detecting objects in screenshot: {str(e)}")
             return 0
-    def _capture_video_frames(self, duration: int = 30, interval: int = 1, label: str = "") -> List[Dict]:
         """Capture frames from the video at regular intervals."""
         results = []
-        print(f"Starting frame capture for {duration} seconds with {interval} second intervals...")
         temp_dir = tempfile.mkdtemp()
         for seconds_elapsed in range(0, duration, interval):
             # Take screenshot
             try:
                 print(f"Capturing frame at {seconds_elapsed} seconds...")
                 screenshot = self._take_screenshot()
                 # Save screenshot for debugging (optional)
                 screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
                 screenshot.save(screenshot_path)
                 # Analyze screenshot
                 object_count = self._analyze_screenshot(screenshot, label)
                 # Store results
-                results.append({
-                    "time": seconds_elapsed,
-                    "object_count": object_count,
-                    "screenshot_path": screenshot_path
-                })
                 # Wait for next interval
                 if seconds_elapsed + interval < duration:
                     time.sleep(interval)
             except Exception as e:
                 print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
         return results
-    def forward(self, url: str, label: str, duration: int = 30, interval: int = 1) -> str:
         """
         Analyzes a video on a webpage by taking screenshots and counting objects.
         Args:
             url (str): The URL of the webpage containing the video.
             label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
             duration (int): How many seconds of the video to analyze.
             interval (int): How often to take screenshots (in seconds).
         Returns:
             str: A detailed report of object counts over time.
         """
         try:
             # Setup the browser
             self._setup_browser()
             # Navigate to the video
             if not self._navigate_to_video(url):
                 return f"Error: Could not navigate to or play the video at {url}"
             # Close any popups or overlays
             self._close_popups()
             # Capture and analyze frames
             frame_results = self._capture_video_frames(duration, interval, label)
             # Calculate summary statistics
             if not frame_results:
                 return f"Error: No frames were successfully captured and analyzed"
             total_objects = sum(result["object_count"] for result in frame_results)
             avg_objects = total_objects / len(frame_results)
             max_objects = max(frame_results, key=lambda x: x["object_count"])
             # Generate a report
             report = [
                 f"# {label.title()} Count Analysis for Video",
@@ -245,22 +262,24 @@ class WebVideoAnalyzerTool(Tool):
                 f"Average {label}s per screenshot: {avg_objects:.2f}",
                 f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
                 "",
-                "## Time-based Analysis"
             ]
             # Add frame-by-frame details
             for result in frame_results:
-                report.append(f"Time {result['time']} seconds: {result['object_count']} {label}s")
             # Clean up
             try:
                 helium.kill_browser()
                 self.driver = None
             except:
                 print("Warning: Could not properly close the browser")
             return "\n".join(report)
         except Exception as e:
             # Ensure browser is closed on error
             try:
@@ -269,6 +288,5 @@ class WebVideoAnalyzerTool(Tool):
                     self.driver = None
             except:
                 pass
             return f"Error analyzing video: {str(e)}"

 from selenium.webdriver.support import expected_conditions as EC
 import helium
 class WebVideoAnalyzerTool(Tool):
     name = "web_video_analyzer"
     description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
         "duration": {
             "type": "integer",
             "description": "How many seconds of the video to analyze (default: 30)",
+            "nullable": True,
         },
         "interval": {
             "type": "integer",
             "description": "How often to take screenshots (in seconds, default: 1)",
+            "nullable": True,
+        },
     }
     output_type = "string"
         """Initialize the browser with appropriate settings."""
         if self.driver is not None:
             return self.driver
         print("Setting up browser...")
         # Configure Chrome options
         chrome_options = webdriver.ChromeOptions()
         chrome_options.add_argument("--force-device-scale-factor=1")
         chrome_options.add_argument("--disable-pdf-viewer")
         chrome_options.add_argument("--window-position=0,0")
         chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
         # Initialize the driver
         self.driver = helium.start_chrome(headless=False, options=chrome_options)
         return self.driver
         try:
             print(f"Navigating to {url}...")
             helium.go_to(url)
             # Wait for page to load
             time.sleep(3)
             # Handle YouTube-specific interactions
             if "youtube.com" in url:
                 try:
                         helium.click("Accept all")
                     elif helium.Button("I agree").exists():
                         helium.click("I agree")
                     # Click on the video to ensure it's playing
                     try:
                         # Find the video player element
                             EC.presence_of_element_located((By.TAG_NAME, "video"))
                         )
                         video_element.click()
                         # Ensure the video is playing by trying to click the play button if visible
                         try:
+                            play_button = self.driver.find_element(
+                                By.CLASS_NAME, "ytp-play-button"
+                            )
                             if "Play" in play_button.get_attribute("aria-label"):
                                 play_button.click()
                         except:
                             pass
                     except:
                         print("Could not locate video element to click")
                 except Exception as e:
                     print(f"Error during YouTube setup: {str(e)}")
             # General approach - try to find and click on any video element
             else:
                 try:
                         video_elements[0].click()
                 except Exception as e:
                     print(f"Could not find or click video element: {str(e)}")
             # Allow video to start
             time.sleep(2)
             return True
         except Exception as e:
             print(f"Error navigating to {url}: {str(e)}")
             return False
         try:
             # Try pressing Escape key to close general popups
             webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
             # YouTube-specific: try to close any visible dialog or popup
             if "youtube.com" in self.driver.current_url:
                 # Try to find and click close buttons on popups
                 try:
+                    close_buttons = self.driver.find_elements(
+                        By.CSS_SELECTOR,
+                        "button.ytp-ad-overlay-close-button, button.ytp-ad-skip-button",
+                    )
                     for button in close_buttons:
                         button.click()
                 except:
     def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
         """Count objects of the specified label in a screenshot."""
         detector = pipeline("object-detection", model="facebook/detr-resnet-50")
         try:
             # Run detection on the image
             results = detector(image)
             # Count objects matching the label
+            object_count = sum(
+                1 for result in results if label.lower() in result["label"].lower()
+            )
             # Debug: print detected classes
             detected_classes = [result["label"] for result in results]
             if detected_classes:
                 print(f"Detected classes: {', '.join(detected_classes)}")
             return object_count
         except Exception as e:
             print(f"Error detecting objects in screenshot: {str(e)}")
             return 0
+    def _capture_video_frames(
+        self, duration: int = 30, interval: int = 1, label: str = ""
+    ) -> List[Dict]:
         """Capture frames from the video at regular intervals."""
         results = []
+        print(
+            f"Starting frame capture for {duration} seconds with {interval} second intervals..."
+        )
         temp_dir = tempfile.mkdtemp()
         for seconds_elapsed in range(0, duration, interval):
             # Take screenshot
             try:
                 print(f"Capturing frame at {seconds_elapsed} seconds...")
                 screenshot = self._take_screenshot()
                 # Save screenshot for debugging (optional)
                 screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
                 screenshot.save(screenshot_path)
                 # Analyze screenshot
                 object_count = self._analyze_screenshot(screenshot, label)
                 # Store results
+                results.append(
+                    {
+                        "time": seconds_elapsed,
+                        "object_count": object_count,
+                        "screenshot_path": screenshot_path,
+                    }
+                )
                 # Wait for next interval
                 if seconds_elapsed + interval < duration:
                     time.sleep(interval)
             except Exception as e:
                 print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
         return results
+    def forward(
+        self, url: str, label: str, duration: int = 30, interval: int = 1
+    ) -> str:
         """
         Analyzes a video on a webpage by taking screenshots and counting objects.
         Args:
             url (str): The URL of the webpage containing the video.
             label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
             duration (int): How many seconds of the video to analyze.
             interval (int): How often to take screenshots (in seconds).
         Returns:
             str: A detailed report of object counts over time.
         """
         try:
             # Setup the browser
             self._setup_browser()
             # Navigate to the video
             if not self._navigate_to_video(url):
                 return f"Error: Could not navigate to or play the video at {url}"
             # Close any popups or overlays
             self._close_popups()
             # Capture and analyze frames
             frame_results = self._capture_video_frames(duration, interval, label)
             # Calculate summary statistics
             if not frame_results:
                 return f"Error: No frames were successfully captured and analyzed"
             total_objects = sum(result["object_count"] for result in frame_results)
             avg_objects = total_objects / len(frame_results)
             max_objects = max(frame_results, key=lambda x: x["object_count"])
             # Generate a report
             report = [
                 f"# {label.title()} Count Analysis for Video",
                 f"Average {label}s per screenshot: {avg_objects:.2f}",
                 f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
                 "",
+                "## Time-based Analysis",
             ]
             # Add frame-by-frame details
             for result in frame_results:
+                report.append(
+                    f"Time {result['time']} seconds: {result['object_count']} {label}s"
+                )
             # Clean up
             try:
                 helium.kill_browser()
                 self.driver = None
             except:
                 print("Warning: Could not properly close the browser")
             return "\n".join(report)
         except Exception as e:
             # Ensure browser is closed on error
             try:
                     self.driver = None
             except:
                 pass
             return f"Error analyzing video: {str(e)}"

tools/web_utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ from selenium.webdriver.common.by import By
 driver = None
 @tool
 def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
     """
@@ -16,19 +17,23 @@ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
     if driver:
         elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
         if nth_result > len(elements):
-            raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
         result = f"Found {len(elements)} matches for '{text}'."
         elem = elements[nth_result - 1]
         driver.execute_script("arguments[0].scrollIntoView(true);", elem)
         result += f"Focused on element {nth_result} of {len(elements)}"
     return result
 @tool
 def go_back() -> None:
     """Goes back to previous page."""
     if driver:
         driver.back()
 @tool
 def close_popups() -> str:
     """

 driver = None
 @tool
 def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
     """
     if driver:
         elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
         if nth_result > len(elements):
+            raise Exception(
+                f"Match n°{nth_result} not found (only {len(elements)} matches found)"
+            )
         result = f"Found {len(elements)} matches for '{text}'."
         elem = elements[nth_result - 1]
         driver.execute_script("arguments[0].scrollIntoView(true);", elem)
         result += f"Focused on element {nth_result} of {len(elements)}"
     return result
 @tool
 def go_back() -> None:
     """Goes back to previous page."""
     if driver:
         driver.back()
 @tool
 def close_popups() -> str:
     """

tools/webpage_parser.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from bs4 import BeautifulSoup
+from smolagents import Tool
+class WebpageParser(Tool):
+    name: str = "webpage_parser_tool"
+    description: str = (
+        "This tool parses elements from HTML to make them easily searchable."
+    )
+    inputs: dict[str, dict[str, str]] = {
+        "html_string": {
+            "type": "string",
+            "description": "The HTML content as a string.",
+        },
+    }
+    output_type: str = "array"
+    def forward(self, html_string: str) -> list[str]:
+        """
+        Parses the HTML string and returns all elements as an array.
+        """
+        # Create a BeautifulSoup object
+        soup = BeautifulSoup(html_string, "html.parser")
+        # Extract all elements as strings
+        elements = [str(element) for element in soup.find_all()]
+        return elements

utils/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import requests
 from smolagents import CodeAgent
 from tqdm import tqdm
 DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
@@ -50,12 +51,15 @@ def run_agent(agent: CodeAgent, questions: list[dict]) -> list[str]:
     for question in tqdm(questions, desc="Running agent"):
         task_id = question.get("task_id")
         question_text = question.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {question}")
             continue
         try:
-            answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": answer})
             results_log.append(
                 {

 import requests
 from smolagents import CodeAgent
 from tqdm import tqdm
+from prompts.default_prompt import generate_prompt
 DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
     for question in tqdm(questions, desc="Running agent"):
         task_id = question.get("task_id")
         question_text = question.get("question")
+        file_name = question.get("file_name")
+        prompt = generate_prompt(question_text, file_name)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {question}")
             continue
         try:
+            answer = agent(prompt)
             answers_payload.append({"task_id": task_id, "submitted_answer": answer})
             results_log.append(
                 {

uv.lock CHANGED Viewed

@@ -342,6 +342,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/83/a2/66adca41164860dee6d2d47b506fef3262c8879aab727b687c798d67313f/duckduckgo_search-8.0.1-py3-none-any.whl", hash = "sha256:87ea18d9abb1cd5dc8f63fc70ac867996acce2cb5e0129d191b9491c202420be", size = 18125 },
 ]
 [[package]]
 name = "fastapi"
 version = "0.115.12"
@@ -544,13 +553,17 @@ name = "hf-agents-gaia-agent"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "ffmpeg" },
     { name = "gradio", extra = ["oauth"] },
     { name = "helium" },
     { name = "litellm" },
     { name = "numpy" },
     { name = "openai" },
     { name = "opencv-python" },
     { name = "pandas" },
     { name = "pillow" },
     { name = "python-dotenv" },
@@ -567,13 +580,17 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "ffmpeg", specifier = ">=1.4" },
     { name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
     { name = "helium", specifier = ">=5.1.1" },
     { name = "litellm", specifier = "==1.67.1" },
     { name = "numpy", specifier = ">=2.2.5" },
     { name = "openai", specifier = ">=1.76.0" },
     { name = "opencv-python", specifier = ">=4.11.0.86" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "pillow", specifier = ">=11.2.1" },
     { name = "python-dotenv", specifier = ">=1.1.0" },
@@ -760,6 +777,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/86/c14d3c24ae13c08296d068e6f79fd4bd17a0a07bddbda94990b87c35d20e/litellm-1.67.1-py3-none-any.whl", hash = "sha256:8fff5b2a16b63bb594b94d6c071ad0f27d3d8cd4348bd5acea2fd40c8e0c11e8", size = 7607266 },
 ]
 [[package]]
 name = "lxml"
 version = "5.4.0"
@@ -874,6 +909,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
 ]
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -952,6 +996,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
 ]
 [[package]]
 name = "numpy"
 version = "2.2.5"
@@ -1142,6 +1208,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
 ]
 [[package]]
 name = "opencv-python"
 version = "4.11.0.86"
@@ -1159,6 +1240,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
 ]
 [[package]]
 name = "orjson"
 version = "3.10.16"

     { url = "https://files.pythonhosted.org/packages/83/a2/66adca41164860dee6d2d47b506fef3262c8879aab727b687c798d67313f/duckduckgo_search-8.0.1-py3-none-any.whl", hash = "sha256:87ea18d9abb1cd5dc8f63fc70ac867996acce2cb5e0129d191b9491c202420be", size = 18125 },
 ]
+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
+]
 [[package]]
 name = "fastapi"
 version = "0.115.12"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "beautifulsoup4" },
     { name = "ffmpeg" },
     { name = "gradio", extra = ["oauth"] },
     { name = "helium" },
     { name = "litellm" },
+    { name = "markdownify" },
     { name = "numpy" },
     { name = "openai" },
+    { name = "openai-whisper" },
     { name = "opencv-python" },
+    { name = "openpyxl" },
     { name = "pandas" },
     { name = "pillow" },
     { name = "python-dotenv" },
 [package.metadata]
 requires-dist = [
+    { name = "beautifulsoup4", specifier = ">=4.13.4" },
     { name = "ffmpeg", specifier = ">=1.4" },
     { name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
     { name = "helium", specifier = ">=5.1.1" },
     { name = "litellm", specifier = "==1.67.1" },
+    { name = "markdownify", specifier = ">=1.1.0" },
     { name = "numpy", specifier = ">=2.2.5" },
     { name = "openai", specifier = ">=1.76.0" },
+    { name = "openai-whisper", specifier = ">=20240930" },
     { name = "opencv-python", specifier = ">=4.11.0.86" },
+    { name = "openpyxl", specifier = ">=3.1.5" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "pillow", specifier = ">=11.2.1" },
     { name = "python-dotenv", specifier = ">=1.1.0" },
     { url = "https://files.pythonhosted.org/packages/88/86/c14d3c24ae13c08296d068e6f79fd4bd17a0a07bddbda94990b87c35d20e/litellm-1.67.1-py3-none-any.whl", hash = "sha256:8fff5b2a16b63bb594b94d6c071ad0f27d3d8cd4348bd5acea2fd40c8e0c11e8", size = 7607266 },
 ]
+[[package]]
+name = "llvmlite"
+version = "0.44.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297 },
+    { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105 },
+    { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901 },
+    { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247 },
+    { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380 },
+    { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306 },
+    { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090 },
+    { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904 },
+    { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245 },
+    { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193 },
+]
 [[package]]
 name = "lxml"
 version = "5.4.0"
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
 ]
+[[package]]
+name = "more-itertools"
+version = "10.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ce/a0/834b0cebabbfc7e311f30b46c8188790a37f89fc8d756660346fe5abfd09/more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3", size = 127671 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2b/9f/7ba6f94fc1e9ac3d2b853fdff3035fb2fa5afbed898c4a72b8a020610594/more_itertools-10.7.0-py3-none-any.whl", hash = "sha256:d43980384673cb07d2f7d2d918c616b30c659c089ee23953f601d6609c67510e", size = 65278 },
+]
 [[package]]
 name = "mpmath"
 version = "1.3.0"
     { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
 ]
+[[package]]
+name = "numba"
+version = "0.61.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "llvmlite" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626 },
+    { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287 },
+    { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928 },
+    { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115 },
+    { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929 },
+    { url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785 },
+    { url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289 },
+    { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918 },
+    { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056 },
+    { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846 },
+]
 [[package]]
 name = "numpy"
 version = "2.2.5"
     { url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
 ]
+[[package]]
+name = "openai-whisper"
+version = "20240930"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+    { name = "numba" },
+    { name = "numpy" },
+    { name = "tiktoken" },
+    { name = "torch" },
+    { name = "tqdm" },
+    { name = "triton", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or sys_platform == 'linux2'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/77/952ca71515f81919bd8a6a4a3f89a27b09e73880cebf90957eda8f2f8545/openai-whisper-20240930.tar.gz", hash = "sha256:b7178e9c1615576807a300024f4daa6353f7e1a815dac5e38c33f1ef055dd2d2", size = 800544 }
 [[package]]
 name = "opencv-python"
 version = "4.11.0.86"
     { url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
 ]
+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
+]
 [[package]]
 name = "orjson"
 version = "3.10.16"