Commit
·
968a67a
1
Parent(s):
9ff7774
add start
Browse files- agents/agent.py +30 -14
- agents/video_agent.py +70 -0
- app.py +1 -0
- prompts/__init__.py +0 -0
- prompts/helium.py +45 -0
- pyproject.toml +3 -0
- tools/video_analyzer.py +232 -157
- tools/web_utils.py +39 -0
- uv.lock +119 -0
agents/agent.py
CHANGED
@@ -3,27 +3,46 @@ from smolagents import (
|
|
3 |
DuckDuckGoSearchTool,
|
4 |
WikipediaSearchTool,
|
5 |
LiteLLMModel,
|
|
|
6 |
)
|
7 |
from tools.text_search import TextSearch
|
8 |
from tools.text_splitter import text_splitter
|
9 |
-
from tools.video_analyzer import
|
10 |
-
|
11 |
|
12 |
class MyAgent:
|
13 |
def __init__(
|
14 |
self,
|
15 |
provider: str = "litellm",
|
16 |
-
model_id: str = "
|
17 |
api_base: str | None = None,
|
18 |
api_key: str | None = None,
|
19 |
planning_interval: int = 3,
|
20 |
num_ctx: int = 8192,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
):
|
22 |
"""
|
23 |
Initializes the agent depending on the provider and model ID.
|
24 |
Args:
|
25 |
provider (str): The provider of the model (e.g., "litellm", "huggingface").
|
26 |
model_id (str): The ID of the model to be used.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
Returns:
|
28 |
None: None
|
29 |
"""
|
@@ -33,30 +52,27 @@ class MyAgent:
|
|
33 |
self.api_key = api_key
|
34 |
self.planning_interval = planning_interval
|
35 |
self.num_ctx = num_ctx
|
|
|
36 |
|
37 |
model = LiteLLMModel(
|
38 |
model_id=self.model_id,
|
39 |
api_base=self.api_base,
|
40 |
api_key=self.api_key,
|
41 |
num_ctx=self.num_ctx,
|
42 |
-
add_base_tools=
|
|
|
43 |
)
|
44 |
|
45 |
-
tools = [
|
46 |
-
DuckDuckGoSearchTool(), # Search tool for web queries
|
47 |
-
WikipediaSearchTool(), # Search tool for Wikipedia queries
|
48 |
-
TextSearch(), # Search tool for text queries
|
49 |
-
text_splitter, # Text splitter tool for breaking down large texts
|
50 |
-
# into manageable lists.
|
51 |
-
YouTubeObjectCounterTool(), # Tool for analyzing YouTube videos
|
52 |
-
]
|
53 |
-
|
54 |
# Initialize the agent with the specified provider and model ID
|
55 |
if provider == "litellm":
|
56 |
self.agent = CodeAgent(
|
57 |
model=model,
|
58 |
tools=tools,
|
59 |
-
planning_interval=planning_interval,
|
|
|
|
|
|
|
|
|
60 |
)
|
61 |
else:
|
62 |
raise ValueError(f"Unsupported provider: {provider}")
|
|
|
3 |
DuckDuckGoSearchTool,
|
4 |
WikipediaSearchTool,
|
5 |
LiteLLMModel,
|
6 |
+
Tool,
|
7 |
)
|
8 |
from tools.text_search import TextSearch
|
9 |
from tools.text_splitter import text_splitter
|
10 |
+
from tools.video_analyzer import WebVideoAnalyzerTool
|
11 |
+
from typing import Callable
|
12 |
|
13 |
class MyAgent:
|
14 |
def __init__(
|
15 |
self,
|
16 |
provider: str = "litellm",
|
17 |
+
model_id: str = "gemini/gemini-2.0-flash-lite",
|
18 |
api_base: str | None = None,
|
19 |
api_key: str | None = None,
|
20 |
planning_interval: int = 3,
|
21 |
num_ctx: int = 8192,
|
22 |
+
tools: list[Tool] = [],
|
23 |
+
add_base_tools: bool = True,
|
24 |
+
temperature: float = 0.2,
|
25 |
+
additional_authorized_imports: list[str] = [],
|
26 |
+
step_callbacks: list[Callable] = [],
|
27 |
+
max_steps: int = 20,
|
28 |
+
verbosity_level: int = 2,
|
29 |
):
|
30 |
"""
|
31 |
Initializes the agent depending on the provider and model ID.
|
32 |
Args:
|
33 |
provider (str): The provider of the model (e.g., "litellm", "huggingface").
|
34 |
model_id (str): The ID of the model to be used.
|
35 |
+
tools (list[Tool]): The tools to be used by the agent.
|
36 |
+
api_base (str | None): The base URL of the API.
|
37 |
+
api_key (str | None): The API key.
|
38 |
+
planning_interval (int): The interval for planning.
|
39 |
+
num_ctx (int): The number of context tokens.
|
40 |
+
add_base_tools (bool): Whether to add base tools.
|
41 |
+
temperature (float): The temperature for the model.
|
42 |
+
additional_authorized_imports (list[str]): The additional authorized imports.
|
43 |
+
step_callbacks (list[Callable]): The step callbacks.
|
44 |
+
max_steps (int): The maximum steps.
|
45 |
+
verbosity_level (int): The verbosity level.
|
46 |
Returns:
|
47 |
None: None
|
48 |
"""
|
|
|
52 |
self.api_key = api_key
|
53 |
self.planning_interval = planning_interval
|
54 |
self.num_ctx = num_ctx
|
55 |
+
self.temperature = temperature
|
56 |
|
57 |
model = LiteLLMModel(
|
58 |
model_id=self.model_id,
|
59 |
api_base=self.api_base,
|
60 |
api_key=self.api_key,
|
61 |
num_ctx=self.num_ctx,
|
62 |
+
add_base_tools=add_base_tools,
|
63 |
+
temperature=self.temperature,
|
64 |
)
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
# Initialize the agent with the specified provider and model ID
|
67 |
if provider == "litellm":
|
68 |
self.agent = CodeAgent(
|
69 |
model=model,
|
70 |
tools=tools,
|
71 |
+
planning_interval=self.planning_interval,
|
72 |
+
additional_authorized_imports=additional_authorized_imports,
|
73 |
+
step_callbacks=step_callbacks,
|
74 |
+
max_steps=max_steps,
|
75 |
+
verbosity_level=verbosity_level,
|
76 |
)
|
77 |
else:
|
78 |
raise ValueError(f"Unsupported provider: {provider}")
|
agents/video_agent.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from io import BytesIO
|
2 |
+
from time import sleep
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
# Add the parent directory to the Python path so modules can be found
|
7 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
8 |
+
|
9 |
+
import helium
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
from PIL import Image
|
12 |
+
from selenium import webdriver
|
13 |
+
|
14 |
+
from smolagents import CodeAgent
|
15 |
+
from smolagents.agents import ActionStep
|
16 |
+
from agents.agent import MyAgent
|
17 |
+
from prompts.helium import HELIUM_PROMPT
|
18 |
+
|
19 |
+
load_dotenv()
|
20 |
+
|
21 |
+
# Configure Chrome options
|
22 |
+
chrome_options = webdriver.ChromeOptions()
|
23 |
+
chrome_options.add_argument("--force-device-scale-factor=1")
|
24 |
+
chrome_options.add_argument("--window-size=1000,1350")
|
25 |
+
chrome_options.add_argument("--disable-pdf-viewer")
|
26 |
+
chrome_options.add_argument("--window-position=0,0")
|
27 |
+
|
28 |
+
# Initialize the browser
|
29 |
+
driver = helium.start_chrome(headless=False, options=chrome_options)
|
30 |
+
|
31 |
+
|
32 |
+
def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
|
33 |
+
sleep(1.0) # Let JavaScript animations happen before taking the screenshot
|
34 |
+
driver = helium.get_driver()
|
35 |
+
current_step = memory_step.step_number
|
36 |
+
if driver is not None:
|
37 |
+
for previous_memory_step in agent.memory.steps: # Remove previous screenshots for lean processing
|
38 |
+
if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
|
39 |
+
previous_memory_step.observations_images = None
|
40 |
+
png_bytes = driver.get_screenshot_as_png()
|
41 |
+
image = Image.open(BytesIO(png_bytes))
|
42 |
+
print(f"Captured a browser screenshot: {image.size} pixels")
|
43 |
+
memory_step.observations_images = [image.copy()] # Create a copy to ensure it persists
|
44 |
+
|
45 |
+
# Update observations with current URL
|
46 |
+
url_info = f"Current url: {driver.current_url}"
|
47 |
+
memory_step.observations = (
|
48 |
+
url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
|
49 |
+
)
|
50 |
+
|
51 |
+
video_agent = MyAgent(
|
52 |
+
api_key=os.getenv("GEMINI_API_KEY"),
|
53 |
+
temperature=0.0,
|
54 |
+
add_base_tools=False,
|
55 |
+
additional_authorized_imports=["helium"],
|
56 |
+
step_callbacks=[save_screenshot],
|
57 |
+
max_steps=20,
|
58 |
+
verbosity_level=2,
|
59 |
+
)
|
60 |
+
|
61 |
+
video_agent.agent.python_executor("from helium import *", video_agent.agent.state)
|
62 |
+
|
63 |
+
|
64 |
+
search_request = """
|
65 |
+
Please navigate to https://en.wikipedia.org/wiki/Chicago and give me a sentence containing the word "1992" that mentions a construction accident.
|
66 |
+
"""
|
67 |
+
|
68 |
+
agent_output = video_agent(search_request + HELIUM_PROMPT)
|
69 |
+
print("Final output:")
|
70 |
+
print(agent_output)
|
app.py
CHANGED
@@ -36,6 +36,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
36 |
api_key=os.getenv("GEMINI_API_KEY"),
|
37 |
planning_interval=3,
|
38 |
num_ctx=8192,
|
|
|
39 |
)
|
40 |
|
41 |
except Exception as e:
|
|
|
36 |
api_key=os.getenv("GEMINI_API_KEY"),
|
37 |
planning_interval=3,
|
38 |
num_ctx=8192,
|
39 |
+
temperature=0.2,
|
40 |
)
|
41 |
|
42 |
except Exception as e:
|
prompts/__init__.py
ADDED
File without changes
|
prompts/helium.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HELIUM_PROMPT = """
|
2 |
+
You can use helium to access websites. Don't bother about the helium driver, it's already managed.
|
3 |
+
We've already ran "from helium import *"
|
4 |
+
Then you can go to pages!
|
5 |
+
Code:
|
6 |
+
```py
|
7 |
+
go_to('github.com/trending')
|
8 |
+
```<end_code>
|
9 |
+
|
10 |
+
You can directly click clickable elements by inputting the text that appears on them.
|
11 |
+
Code:
|
12 |
+
```py
|
13 |
+
click("Top products")
|
14 |
+
```<end_code>
|
15 |
+
|
16 |
+
If it's a link:
|
17 |
+
Code:
|
18 |
+
```py
|
19 |
+
click(Link("Top products"))
|
20 |
+
```<end_code>
|
21 |
+
|
22 |
+
If you try to interact with an element and it's not found, you'll get a LookupError.
|
23 |
+
In general stop your action after each button click to see what happens on your screenshot.
|
24 |
+
Never try to login in a page.
|
25 |
+
|
26 |
+
To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from.
|
27 |
+
Code:
|
28 |
+
```py
|
29 |
+
scroll_down(num_pixels=1200) # This will scroll one viewport down
|
30 |
+
```<end_code>
|
31 |
+
|
32 |
+
When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
|
33 |
+
Just use your built-in tool `close_popups` to close them:
|
34 |
+
Code:
|
35 |
+
```py
|
36 |
+
close_popups()
|
37 |
+
```<end_code>
|
38 |
+
|
39 |
+
You can use .exists() to check for the existence of an element. For example:
|
40 |
+
Code:
|
41 |
+
```py
|
42 |
+
if Text('Accept cookies?').exists():
|
43 |
+
click('I accept')
|
44 |
+
```<end_code>
|
45 |
+
"""
|
pyproject.toml
CHANGED
@@ -7,13 +7,16 @@ requires-python = ">=3.12"
|
|
7 |
dependencies = [
|
8 |
"ffmpeg>=1.4",
|
9 |
"gradio[oauth]>=5.27.0",
|
|
|
10 |
"litellm==1.67.1",
|
11 |
"numpy>=2.2.5",
|
12 |
"openai>=1.76.0",
|
13 |
"opencv-python>=4.11.0.86",
|
14 |
"pandas>=2.2.3",
|
|
|
15 |
"python-dotenv>=1.1.0",
|
16 |
"requests>=2.32.3",
|
|
|
17 |
"smolagents[litellm]>=1.14.0",
|
18 |
"timm>=1.0.15",
|
19 |
"torch>=2.7.0",
|
|
|
7 |
dependencies = [
|
8 |
"ffmpeg>=1.4",
|
9 |
"gradio[oauth]>=5.27.0",
|
10 |
+
"helium>=5.1.1",
|
11 |
"litellm==1.67.1",
|
12 |
"numpy>=2.2.5",
|
13 |
"openai>=1.76.0",
|
14 |
"opencv-python>=4.11.0.86",
|
15 |
"pandas>=2.2.3",
|
16 |
+
"pillow>=11.2.1",
|
17 |
"python-dotenv>=1.1.0",
|
18 |
"requests>=2.32.3",
|
19 |
+
"selenium>=4.31.0",
|
20 |
"smolagents[litellm]>=1.14.0",
|
21 |
"timm>=1.0.15",
|
22 |
"torch>=2.7.0",
|
tools/video_analyzer.py
CHANGED
@@ -1,199 +1,274 @@
|
|
1 |
from smolagents import Tool
|
2 |
import os
|
3 |
-
import
|
4 |
import tempfile
|
5 |
-
from yt_dlp import YoutubeDL
|
6 |
from transformers import pipeline
|
7 |
-
from typing import
|
8 |
from PIL import Image
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
inputs = {
|
15 |
"url": {
|
16 |
"type": "string",
|
17 |
-
"description": "The URL of the
|
18 |
},
|
19 |
"label": {
|
20 |
"type": "string",
|
21 |
"description": "The type of object to count (e.g., 'bird', 'person', 'car', 'dog'). Use common object names recognized by standard object detection models.",
|
22 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
}
|
24 |
output_type = "string"
|
25 |
|
26 |
-
def
|
27 |
-
"""
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
try:
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
except Exception as e:
|
46 |
-
|
47 |
-
|
48 |
-
raise RuntimeError(error_msg)
|
49 |
-
|
50 |
-
def _count_objects_in_frame(self, frame, label: str):
|
51 |
-
"""Counts objects of specified label in a single frame using the object detection model."""
|
52 |
|
|
|
|
|
53 |
try:
|
54 |
-
#
|
55 |
-
|
56 |
-
|
57 |
-
#
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
# Count objects matching the label
|
67 |
-
object_count = sum(
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
return object_count
|
|
|
71 |
except Exception as e:
|
72 |
-
print(f"Error detecting objects in
|
73 |
return 0
|
74 |
|
75 |
-
def
|
76 |
-
"""
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
)
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
if object_count > max_objects:
|
114 |
-
max_objects = object_count
|
115 |
-
max_objects_frame = frame_idx
|
116 |
-
|
117 |
-
frame_results.append(
|
118 |
-
{
|
119 |
-
"frame": frame_idx,
|
120 |
-
"time": time_point,
|
121 |
-
"object_count": object_count,
|
122 |
-
}
|
123 |
-
)
|
124 |
-
|
125 |
-
frame_idx += 1
|
126 |
-
|
127 |
-
# Release resources
|
128 |
-
cap.release()
|
129 |
-
|
130 |
-
# Calculate statistics
|
131 |
-
avg_objects_per_frame = (
|
132 |
-
total_objects / len(frame_results) if frame_results else 0
|
133 |
-
)
|
134 |
-
max_objects_time = max_objects_frame / fps if max_objects_frame else 0
|
135 |
-
|
136 |
-
# Clean up the temporary file
|
137 |
-
try:
|
138 |
-
os.remove(video_path)
|
139 |
-
print(f"Deleted temporary video file: {video_path}")
|
140 |
-
except Exception as e:
|
141 |
-
print(
|
142 |
-
f"Warning: Failed to delete temporary video file: {video_path} | {str(e)}"
|
143 |
-
)
|
144 |
-
|
145 |
-
return {
|
146 |
-
"frame_results": frame_results,
|
147 |
-
"total_frames_analyzed": len(frame_results),
|
148 |
-
"video_duration": duration,
|
149 |
-
"fps": fps,
|
150 |
-
"total_frames": frame_count,
|
151 |
-
"average_objects_per_analyzed_frame": avg_objects_per_frame,
|
152 |
-
"max_objects_in_single_frame": max_objects,
|
153 |
-
"max_objects_frame": max_objects_frame,
|
154 |
-
"max_objects_time": max_objects_time,
|
155 |
-
"label": label,
|
156 |
-
}
|
157 |
-
|
158 |
-
def forward(self, url: str, label: str) -> str:
|
159 |
"""
|
160 |
-
Analyzes a
|
161 |
-
|
162 |
Args:
|
163 |
-
url (str): The URL of the
|
164 |
label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
|
165 |
-
|
|
|
|
|
166 |
Returns:
|
167 |
-
str: A detailed report of object counts
|
168 |
"""
|
169 |
-
|
170 |
try:
|
171 |
-
#
|
172 |
-
|
173 |
-
|
174 |
-
#
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
# Generate a report
|
178 |
report = [
|
179 |
-
f"# {label.title()} Count Analysis for
|
180 |
f"Video URL: {url}",
|
181 |
-
f"
|
182 |
-
f"
|
183 |
-
|
184 |
"## Summary",
|
185 |
-
f"
|
186 |
-
f"
|
|
|
|
|
|
|
187 |
]
|
188 |
-
|
189 |
# Add frame-by-frame details
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
196 |
return "\n".join(report)
|
197 |
-
|
198 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
return f"Error analyzing video: {str(e)}"
|
|
|
|
1 |
from smolagents import Tool
|
2 |
import os
|
3 |
+
import time
|
4 |
import tempfile
|
|
|
5 |
from transformers import pipeline
|
6 |
+
from typing import List, Dict
|
7 |
from PIL import Image
|
8 |
+
import io
|
9 |
+
|
10 |
+
# Import required browser automation libraries
|
11 |
+
from selenium import webdriver
|
12 |
+
from selenium.webdriver.common.by import By
|
13 |
+
from selenium.webdriver.common.keys import Keys
|
14 |
+
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
15 |
+
from selenium.webdriver.support.ui import WebDriverWait
|
16 |
+
from selenium.webdriver.support import expected_conditions as EC
|
17 |
+
import helium
|
18 |
+
|
19 |
+
class WebVideoAnalyzerTool(Tool):
|
20 |
+
name = "web_video_analyzer"
|
21 |
+
description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
|
22 |
inputs = {
|
23 |
"url": {
|
24 |
"type": "string",
|
25 |
+
"description": "The URL of the web page containing the video to analyze.",
|
26 |
},
|
27 |
"label": {
|
28 |
"type": "string",
|
29 |
"description": "The type of object to count (e.g., 'bird', 'person', 'car', 'dog'). Use common object names recognized by standard object detection models.",
|
30 |
},
|
31 |
+
"duration": {
|
32 |
+
"type": "integer",
|
33 |
+
"description": "How many seconds of the video to analyze (default: 30)",
|
34 |
+
},
|
35 |
+
"interval": {
|
36 |
+
"type": "integer",
|
37 |
+
"description": "How often to take screenshots (in seconds, default: 1)",
|
38 |
+
}
|
39 |
}
|
40 |
output_type = "string"
|
41 |
|
42 |
+
def _setup_browser(self):
|
43 |
+
"""Initialize the browser with appropriate settings."""
|
44 |
+
if self.driver is not None:
|
45 |
+
return self.driver
|
46 |
+
|
47 |
+
print("Setting up browser...")
|
48 |
+
|
49 |
+
# Configure Chrome options
|
50 |
+
chrome_options = webdriver.ChromeOptions()
|
51 |
+
chrome_options.add_argument("--force-device-scale-factor=1")
|
52 |
+
chrome_options.add_argument("--window-size=1280,720")
|
53 |
+
chrome_options.add_argument("--disable-pdf-viewer")
|
54 |
+
chrome_options.add_argument("--window-position=0,0")
|
55 |
+
chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
|
56 |
+
|
57 |
+
# Initialize the driver
|
58 |
+
self.driver = helium.start_chrome(headless=False, options=chrome_options)
|
59 |
+
return self.driver
|
60 |
+
|
61 |
+
def _navigate_to_video(self, url: str) -> bool:
|
62 |
+
"""Navigate to the video URL and prepare for playback."""
|
63 |
try:
|
64 |
+
print(f"Navigating to {url}...")
|
65 |
+
helium.go_to(url)
|
66 |
+
|
67 |
+
# Wait for page to load
|
68 |
+
time.sleep(3)
|
69 |
+
|
70 |
+
# Handle YouTube-specific interactions
|
71 |
+
if "youtube.com" in url:
|
72 |
+
try:
|
73 |
+
# Accept cookies if prompted
|
74 |
+
if helium.Button("Accept all").exists():
|
75 |
+
helium.click("Accept all")
|
76 |
+
elif helium.Button("I agree").exists():
|
77 |
+
helium.click("I agree")
|
78 |
+
|
79 |
+
# Click on the video to ensure it's playing
|
80 |
+
try:
|
81 |
+
# Find the video player element
|
82 |
+
video_element = WebDriverWait(self.driver, 10).until(
|
83 |
+
EC.presence_of_element_located((By.TAG_NAME, "video"))
|
84 |
+
)
|
85 |
+
video_element.click()
|
86 |
+
|
87 |
+
# Ensure the video is playing by trying to click the play button if visible
|
88 |
+
try:
|
89 |
+
play_button = self.driver.find_element(By.CLASS_NAME, "ytp-play-button")
|
90 |
+
if "Play" in play_button.get_attribute("aria-label"):
|
91 |
+
play_button.click()
|
92 |
+
except:
|
93 |
+
pass
|
94 |
+
|
95 |
+
except:
|
96 |
+
print("Could not locate video element to click")
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error during YouTube setup: {str(e)}")
|
100 |
+
|
101 |
+
# General approach - try to find and click on any video element
|
102 |
+
else:
|
103 |
+
try:
|
104 |
+
# Try to find video element
|
105 |
+
video_elements = self.driver.find_elements(By.TAG_NAME, "video")
|
106 |
+
if video_elements:
|
107 |
+
video_elements[0].click()
|
108 |
+
except Exception as e:
|
109 |
+
print(f"Could not find or click video element: {str(e)}")
|
110 |
+
|
111 |
+
# Allow video to start
|
112 |
+
time.sleep(2)
|
113 |
+
return True
|
114 |
+
|
115 |
except Exception as e:
|
116 |
+
print(f"Error navigating to {url}: {str(e)}")
|
117 |
+
return False
|
|
|
|
|
|
|
|
|
118 |
|
119 |
+
def _close_popups(self):
|
120 |
+
"""Attempt to close any popups or overlays."""
|
121 |
try:
|
122 |
+
# Try pressing Escape key to close general popups
|
123 |
+
webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
|
124 |
+
|
125 |
+
# YouTube-specific: try to close any visible dialog or popup
|
126 |
+
if "youtube.com" in self.driver.current_url:
|
127 |
+
# Try to find and click close buttons on popups
|
128 |
+
try:
|
129 |
+
close_buttons = self.driver.find_elements(By.CSS_SELECTOR,
|
130 |
+
"button.ytp-ad-overlay-close-button, button.ytp-ad-skip-button")
|
131 |
+
for button in close_buttons:
|
132 |
+
button.click()
|
133 |
+
except:
|
134 |
+
pass
|
135 |
+
except Exception as e:
|
136 |
+
print(f"Error closing popups: {str(e)}")
|
137 |
|
138 |
+
def _take_screenshot(self) -> Image.Image:
|
139 |
+
"""Take a screenshot of the current browser window."""
|
140 |
+
png_bytes = self.driver.get_screenshot_as_png()
|
141 |
+
return Image.open(io.BytesIO(png_bytes))
|
142 |
|
143 |
+
def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
|
144 |
+
"""Count objects of the specified label in a screenshot."""
|
145 |
+
detector = pipeline("object-detection", model="facebook/detr-resnet-50")
|
146 |
+
|
147 |
+
try:
|
148 |
+
# Run detection on the image
|
149 |
+
results = detector(image)
|
150 |
+
|
151 |
# Count objects matching the label
|
152 |
+
object_count = sum(1 for result in results if label.lower() in result["label"].lower())
|
153 |
+
|
154 |
+
# Debug: print detected classes
|
155 |
+
detected_classes = [result["label"] for result in results]
|
156 |
+
if detected_classes:
|
157 |
+
print(f"Detected classes: {', '.join(detected_classes)}")
|
158 |
+
|
159 |
return object_count
|
160 |
+
|
161 |
except Exception as e:
|
162 |
+
print(f"Error detecting objects in screenshot: {str(e)}")
|
163 |
return 0
|
164 |
|
165 |
+
def _capture_video_frames(self, duration: int = 30, interval: int = 1, label: str = "") -> List[Dict]:
|
166 |
+
"""Capture frames from the video at regular intervals."""
|
167 |
+
results = []
|
168 |
+
|
169 |
+
print(f"Starting frame capture for {duration} seconds with {interval} second intervals...")
|
170 |
+
temp_dir = tempfile.mkdtemp()
|
171 |
+
|
172 |
+
for seconds_elapsed in range(0, duration, interval):
|
173 |
+
# Take screenshot
|
174 |
+
try:
|
175 |
+
print(f"Capturing frame at {seconds_elapsed} seconds...")
|
176 |
+
screenshot = self._take_screenshot()
|
177 |
+
|
178 |
+
# Save screenshot for debugging (optional)
|
179 |
+
screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
|
180 |
+
screenshot.save(screenshot_path)
|
181 |
+
|
182 |
+
# Analyze screenshot
|
183 |
+
object_count = self._analyze_screenshot(screenshot, label)
|
184 |
+
|
185 |
+
# Store results
|
186 |
+
results.append({
|
187 |
+
"time": seconds_elapsed,
|
188 |
+
"object_count": object_count,
|
189 |
+
"screenshot_path": screenshot_path
|
190 |
+
})
|
191 |
+
|
192 |
+
# Wait for next interval
|
193 |
+
if seconds_elapsed + interval < duration:
|
194 |
+
time.sleep(interval)
|
195 |
+
|
196 |
+
except Exception as e:
|
197 |
+
print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
|
198 |
+
|
199 |
+
return results
|
200 |
+
|
201 |
+
def forward(self, url: str, label: str, duration: int = 30, interval: int = 1) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
"""
|
203 |
+
Analyzes a video on a webpage by taking screenshots and counting objects.
|
204 |
+
|
205 |
Args:
|
206 |
+
url (str): The URL of the webpage containing the video.
|
207 |
label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
|
208 |
+
duration (int): How many seconds of the video to analyze.
|
209 |
+
interval (int): How often to take screenshots (in seconds).
|
210 |
+
|
211 |
Returns:
|
212 |
+
str: A detailed report of object counts over time.
|
213 |
"""
|
|
|
214 |
try:
|
215 |
+
# Setup the browser
|
216 |
+
self._setup_browser()
|
217 |
+
|
218 |
+
# Navigate to the video
|
219 |
+
if not self._navigate_to_video(url):
|
220 |
+
return f"Error: Could not navigate to or play the video at {url}"
|
221 |
+
|
222 |
+
# Close any popups or overlays
|
223 |
+
self._close_popups()
|
224 |
+
|
225 |
+
# Capture and analyze frames
|
226 |
+
frame_results = self._capture_video_frames(duration, interval, label)
|
227 |
+
|
228 |
+
# Calculate summary statistics
|
229 |
+
if not frame_results:
|
230 |
+
return f"Error: No frames were successfully captured and analyzed"
|
231 |
+
|
232 |
+
total_objects = sum(result["object_count"] for result in frame_results)
|
233 |
+
avg_objects = total_objects / len(frame_results)
|
234 |
+
max_objects = max(frame_results, key=lambda x: x["object_count"])
|
235 |
+
|
236 |
# Generate a report
|
237 |
report = [
|
238 |
+
f"# {label.title()} Count Analysis for Video",
|
239 |
f"Video URL: {url}",
|
240 |
+
f"Analysis duration: {duration} seconds",
|
241 |
+
f"Screenshots taken: {len(frame_results)} (every {interval} second(s))",
|
242 |
+
"",
|
243 |
"## Summary",
|
244 |
+
f"Total {label}s detected: {total_objects}",
|
245 |
+
f"Average {label}s per screenshot: {avg_objects:.2f}",
|
246 |
+
f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
|
247 |
+
"",
|
248 |
+
"## Time-based Analysis"
|
249 |
]
|
250 |
+
|
251 |
# Add frame-by-frame details
|
252 |
+
for result in frame_results:
|
253 |
+
report.append(f"Time {result['time']} seconds: {result['object_count']} {label}s")
|
254 |
+
|
255 |
+
# Clean up
|
256 |
+
try:
|
257 |
+
helium.kill_browser()
|
258 |
+
self.driver = None
|
259 |
+
except:
|
260 |
+
print("Warning: Could not properly close the browser")
|
261 |
+
|
262 |
return "\n".join(report)
|
263 |
+
|
264 |
except Exception as e:
|
265 |
+
# Ensure browser is closed on error
|
266 |
+
try:
|
267 |
+
if self.driver:
|
268 |
+
helium.kill_browser()
|
269 |
+
self.driver = None
|
270 |
+
except:
|
271 |
+
pass
|
272 |
+
|
273 |
return f"Error analyzing video: {str(e)}"
|
274 |
+
|
tools/web_utils.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from smolagents import tool
|
2 |
+
from selenium import webdriver
|
3 |
+
from selenium.webdriver.common.keys import Keys
|
4 |
+
from selenium.webdriver.common.by import By
|
5 |
+
|
6 |
+
driver = None
|
7 |
+
|
8 |
+
@tool
|
9 |
+
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
|
10 |
+
"""
|
11 |
+
Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
|
12 |
+
Args:
|
13 |
+
text: The text to search for
|
14 |
+
nth_result: Which occurrence to jump to (default: 1)
|
15 |
+
"""
|
16 |
+
if driver:
|
17 |
+
elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
|
18 |
+
if nth_result > len(elements):
|
19 |
+
raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
|
20 |
+
result = f"Found {len(elements)} matches for '{text}'."
|
21 |
+
elem = elements[nth_result - 1]
|
22 |
+
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
|
23 |
+
result += f"Focused on element {nth_result} of {len(elements)}"
|
24 |
+
return result
|
25 |
+
|
26 |
+
@tool
|
27 |
+
def go_back() -> None:
|
28 |
+
"""Goes back to previous page."""
|
29 |
+
if driver:
|
30 |
+
driver.back()
|
31 |
+
|
32 |
+
@tool
|
33 |
+
def close_popups() -> str:
|
34 |
+
"""
|
35 |
+
Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows!
|
36 |
+
This does not work on cookie consent banners.
|
37 |
+
"""
|
38 |
+
if driver:
|
39 |
+
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
|
uv.lock
CHANGED
@@ -530,6 +530,15 @@ wheels = [
|
|
530 |
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
|
531 |
]
|
532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
[[package]]
|
534 |
name = "hf-agents-gaia-agent"
|
535 |
version = "0.1.0"
|
@@ -537,13 +546,16 @@ source = { virtual = "." }
|
|
537 |
dependencies = [
|
538 |
{ name = "ffmpeg" },
|
539 |
{ name = "gradio", extra = ["oauth"] },
|
|
|
540 |
{ name = "litellm" },
|
541 |
{ name = "numpy" },
|
542 |
{ name = "openai" },
|
543 |
{ name = "opencv-python" },
|
544 |
{ name = "pandas" },
|
|
|
545 |
{ name = "python-dotenv" },
|
546 |
{ name = "requests" },
|
|
|
547 |
{ name = "smolagents", extra = ["litellm"] },
|
548 |
{ name = "timm" },
|
549 |
{ name = "torch" },
|
@@ -557,13 +569,16 @@ dependencies = [
|
|
557 |
requires-dist = [
|
558 |
{ name = "ffmpeg", specifier = ">=1.4" },
|
559 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
|
|
|
560 |
{ name = "litellm", specifier = "==1.67.1" },
|
561 |
{ name = "numpy", specifier = ">=2.2.5" },
|
562 |
{ name = "openai", specifier = ">=1.76.0" },
|
563 |
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
564 |
{ name = "pandas", specifier = ">=2.2.3" },
|
|
|
565 |
{ name = "python-dotenv", specifier = ">=1.1.0" },
|
566 |
{ name = "requests", specifier = ">=2.32.3" },
|
|
|
567 |
{ name = "smolagents", extras = ["litellm"], specifier = ">=1.14.0" },
|
568 |
{ name = "timm", specifier = ">=1.0.15" },
|
569 |
{ name = "torch", specifier = ">=2.7.0" },
|
@@ -1180,6 +1195,18 @@ wheels = [
|
|
1180 |
{ url = "https://files.pythonhosted.org/packages/81/9c/b66ce9245ff319df2c3278acd351a3f6145ef34b4a2d7f4b0f739368370f/orjson-3.10.16-cp313-cp313-win_amd64.whl", hash = "sha256:fe0a145e96d51971407cb8ba947e63ead2aa915db59d6631a355f5f2150b56b7", size = 133954 },
|
1181 |
]
|
1182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1183 |
[[package]]
|
1184 |
name = "packaging"
|
1185 |
version = "25.0"
|
@@ -1421,6 +1448,15 @@ wheels = [
|
|
1421 |
{ url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
|
1422 |
]
|
1423 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1424 |
[[package]]
|
1425 |
name = "python-dateutil"
|
1426 |
version = "2.9.0.post0"
|
@@ -1672,6 +1708,23 @@ wheels = [
|
|
1672 |
{ url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878 },
|
1673 |
]
|
1674 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1675 |
[[package]]
|
1676 |
name = "semantic-version"
|
1677 |
version = "2.10.0"
|
@@ -1741,6 +1794,15 @@ wheels = [
|
|
1741 |
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
|
1742 |
]
|
1743 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1744 |
[[package]]
|
1745 |
name = "soupsieve"
|
1746 |
version = "2.7"
|
@@ -1948,6 +2010,37 @@ wheels = [
|
|
1948 |
{ url = "https://files.pythonhosted.org/packages/a9/b6/5257d04ae327b44db31f15cce39e6020cc986333c715660b1315a9724d82/transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83", size = 10383940 },
|
1949 |
]
|
1950 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1951 |
[[package]]
|
1952 |
name = "triton"
|
1953 |
version = "3.3.0"
|
@@ -2015,6 +2108,11 @@ wheels = [
|
|
2015 |
{ url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 },
|
2016 |
]
|
2017 |
|
|
|
|
|
|
|
|
|
|
|
2018 |
[[package]]
|
2019 |
name = "uvicorn"
|
2020 |
version = "0.34.2"
|
@@ -2028,6 +2126,15 @@ wheels = [
|
|
2028 |
{ url = "https://files.pythonhosted.org/packages/b1/4b/4cef6ce21a2aaca9d852a6e84ef4f135d99fcd74fa75105e2fc0c8308acd/uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403", size = 62483 },
|
2029 |
]
|
2030 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2031 |
[[package]]
|
2032 |
name = "websockets"
|
2033 |
version = "15.0.1"
|
@@ -2068,6 +2175,18 @@ dependencies = [
|
|
2068 |
]
|
2069 |
sdist = { url = "https://files.pythonhosted.org/packages/b9/aa/2e35be124dfc7e581480705f912040172f6570cc12e68a245ba9258c32ef/wikipedia_api-0.8.1.tar.gz", hash = "sha256:b31e93b3f5407c1a1ba413ed7326a05379a3c270df6cf6a211aca67a14c5658b", size = 19934 }
|
2070 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2071 |
[[package]]
|
2072 |
name = "yarl"
|
2073 |
version = "1.20.0"
|
|
|
530 |
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
|
531 |
]
|
532 |
|
533 |
+
[[package]]
|
534 |
+
name = "helium"
|
535 |
+
version = "5.1.1"
|
536 |
+
source = { registry = "https://pypi.org/simple" }
|
537 |
+
dependencies = [
|
538 |
+
{ name = "selenium" },
|
539 |
+
]
|
540 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d1/71/6f2bef5db8741467848f2b2c7f7818df44234df0de0917428a16da3f6e81/helium-5.1.1.tar.gz", hash = "sha256:8232b6597d24b435cda4e18a95ae883ff0bdcdbff70f98a6cb41133864d2d493", size = 40494 }
|
541 |
+
|
542 |
[[package]]
|
543 |
name = "hf-agents-gaia-agent"
|
544 |
version = "0.1.0"
|
|
|
546 |
dependencies = [
|
547 |
{ name = "ffmpeg" },
|
548 |
{ name = "gradio", extra = ["oauth"] },
|
549 |
+
{ name = "helium" },
|
550 |
{ name = "litellm" },
|
551 |
{ name = "numpy" },
|
552 |
{ name = "openai" },
|
553 |
{ name = "opencv-python" },
|
554 |
{ name = "pandas" },
|
555 |
+
{ name = "pillow" },
|
556 |
{ name = "python-dotenv" },
|
557 |
{ name = "requests" },
|
558 |
+
{ name = "selenium" },
|
559 |
{ name = "smolagents", extra = ["litellm"] },
|
560 |
{ name = "timm" },
|
561 |
{ name = "torch" },
|
|
|
569 |
requires-dist = [
|
570 |
{ name = "ffmpeg", specifier = ">=1.4" },
|
571 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
|
572 |
+
{ name = "helium", specifier = ">=5.1.1" },
|
573 |
{ name = "litellm", specifier = "==1.67.1" },
|
574 |
{ name = "numpy", specifier = ">=2.2.5" },
|
575 |
{ name = "openai", specifier = ">=1.76.0" },
|
576 |
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
577 |
{ name = "pandas", specifier = ">=2.2.3" },
|
578 |
+
{ name = "pillow", specifier = ">=11.2.1" },
|
579 |
{ name = "python-dotenv", specifier = ">=1.1.0" },
|
580 |
{ name = "requests", specifier = ">=2.32.3" },
|
581 |
+
{ name = "selenium", specifier = ">=4.31.0" },
|
582 |
{ name = "smolagents", extras = ["litellm"], specifier = ">=1.14.0" },
|
583 |
{ name = "timm", specifier = ">=1.0.15" },
|
584 |
{ name = "torch", specifier = ">=2.7.0" },
|
|
|
1195 |
{ url = "https://files.pythonhosted.org/packages/81/9c/b66ce9245ff319df2c3278acd351a3f6145ef34b4a2d7f4b0f739368370f/orjson-3.10.16-cp313-cp313-win_amd64.whl", hash = "sha256:fe0a145e96d51971407cb8ba947e63ead2aa915db59d6631a355f5f2150b56b7", size = 133954 },
|
1196 |
]
|
1197 |
|
1198 |
+
[[package]]
|
1199 |
+
name = "outcome"
|
1200 |
+
version = "1.3.0.post0"
|
1201 |
+
source = { registry = "https://pypi.org/simple" }
|
1202 |
+
dependencies = [
|
1203 |
+
{ name = "attrs" },
|
1204 |
+
]
|
1205 |
+
sdist = { url = "https://files.pythonhosted.org/packages/98/df/77698abfac98571e65ffeb0c1fba8ffd692ab8458d617a0eed7d9a8d38f2/outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", size = 21060 }
|
1206 |
+
wheels = [
|
1207 |
+
{ url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692 },
|
1208 |
+
]
|
1209 |
+
|
1210 |
[[package]]
|
1211 |
name = "packaging"
|
1212 |
version = "25.0"
|
|
|
1448 |
{ url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
|
1449 |
]
|
1450 |
|
1451 |
+
[[package]]
|
1452 |
+
name = "pysocks"
|
1453 |
+
version = "1.7.1"
|
1454 |
+
source = { registry = "https://pypi.org/simple" }
|
1455 |
+
sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429 }
|
1456 |
+
wheels = [
|
1457 |
+
{ url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725 },
|
1458 |
+
]
|
1459 |
+
|
1460 |
[[package]]
|
1461 |
name = "python-dateutil"
|
1462 |
version = "2.9.0.post0"
|
|
|
1708 |
{ url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878 },
|
1709 |
]
|
1710 |
|
1711 |
+
[[package]]
|
1712 |
+
name = "selenium"
|
1713 |
+
version = "4.31.0"
|
1714 |
+
source = { registry = "https://pypi.org/simple" }
|
1715 |
+
dependencies = [
|
1716 |
+
{ name = "certifi" },
|
1717 |
+
{ name = "trio" },
|
1718 |
+
{ name = "trio-websocket" },
|
1719 |
+
{ name = "typing-extensions" },
|
1720 |
+
{ name = "urllib3", extra = ["socks"] },
|
1721 |
+
{ name = "websocket-client" },
|
1722 |
+
]
|
1723 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e0/bf/642cce8b5a9edad8e4880fdefbeb24f69bec2086b1121c63f883c412b797/selenium-4.31.0.tar.gz", hash = "sha256:441cffc436a2e6659fe3cfb012692435652efd38b0d368d16f661a5db47825f5", size = 855418 }
|
1724 |
+
wheels = [
|
1725 |
+
{ url = "https://files.pythonhosted.org/packages/32/53/212db779d2481b0a8428365960596f8d5a4d482ae12c441d0507fd54aaf2/selenium-4.31.0-py3-none-any.whl", hash = "sha256:7b8b8d5e424d7133cb7aa656263b19ac505ec26d65c0f921a696e7e2c5ccd95b", size = 9350584 },
|
1726 |
+
]
|
1727 |
+
|
1728 |
[[package]]
|
1729 |
name = "semantic-version"
|
1730 |
version = "2.10.0"
|
|
|
1794 |
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
|
1795 |
]
|
1796 |
|
1797 |
+
[[package]]
|
1798 |
+
name = "sortedcontainers"
|
1799 |
+
version = "2.4.0"
|
1800 |
+
source = { registry = "https://pypi.org/simple" }
|
1801 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 }
|
1802 |
+
wheels = [
|
1803 |
+
{ url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 },
|
1804 |
+
]
|
1805 |
+
|
1806 |
[[package]]
|
1807 |
name = "soupsieve"
|
1808 |
version = "2.7"
|
|
|
2010 |
{ url = "https://files.pythonhosted.org/packages/a9/b6/5257d04ae327b44db31f15cce39e6020cc986333c715660b1315a9724d82/transformers-4.51.3-py3-none-any.whl", hash = "sha256:fd3279633ceb2b777013234bbf0b4f5c2d23c4626b05497691f00cfda55e8a83", size = 10383940 },
|
2011 |
]
|
2012 |
|
2013 |
+
[[package]]
|
2014 |
+
name = "trio"
|
2015 |
+
version = "0.30.0"
|
2016 |
+
source = { registry = "https://pypi.org/simple" }
|
2017 |
+
dependencies = [
|
2018 |
+
{ name = "attrs" },
|
2019 |
+
{ name = "cffi", marker = "(implementation_name != 'pypy' and os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (implementation_name != 'pypy' and os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" },
|
2020 |
+
{ name = "idna" },
|
2021 |
+
{ name = "outcome" },
|
2022 |
+
{ name = "sniffio" },
|
2023 |
+
{ name = "sortedcontainers" },
|
2024 |
+
]
|
2025 |
+
sdist = { url = "https://files.pythonhosted.org/packages/01/c1/68d582b4d3a1c1f8118e18042464bb12a7c1b75d64d75111b297687041e3/trio-0.30.0.tar.gz", hash = "sha256:0781c857c0c81f8f51e0089929a26b5bb63d57f927728a5586f7e36171f064df", size = 593776 }
|
2026 |
+
wheels = [
|
2027 |
+
{ url = "https://files.pythonhosted.org/packages/69/8e/3f6dfda475ecd940e786defe6df6c500734e686c9cd0a0f8ef6821e9b2f2/trio-0.30.0-py3-none-any.whl", hash = "sha256:3bf4f06b8decf8d3cf00af85f40a89824669e2d033bb32469d34840edcfc22a5", size = 499194 },
|
2028 |
+
]
|
2029 |
+
|
2030 |
+
[[package]]
|
2031 |
+
name = "trio-websocket"
|
2032 |
+
version = "0.12.2"
|
2033 |
+
source = { registry = "https://pypi.org/simple" }
|
2034 |
+
dependencies = [
|
2035 |
+
{ name = "outcome" },
|
2036 |
+
{ name = "trio" },
|
2037 |
+
{ name = "wsproto" },
|
2038 |
+
]
|
2039 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d1/3c/8b4358e81f2f2cfe71b66a267f023a91db20a817b9425dd964873796980a/trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae", size = 33549 }
|
2040 |
+
wheels = [
|
2041 |
+
{ url = "https://files.pythonhosted.org/packages/c7/19/eb640a397bba49ba49ef9dbe2e7e5c04202ba045b6ce2ec36e9cadc51e04/trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6", size = 21221 },
|
2042 |
+
]
|
2043 |
+
|
2044 |
[[package]]
|
2045 |
name = "triton"
|
2046 |
version = "3.3.0"
|
|
|
2108 |
{ url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 },
|
2109 |
]
|
2110 |
|
2111 |
+
[package.optional-dependencies]
|
2112 |
+
socks = [
|
2113 |
+
{ name = "pysocks" },
|
2114 |
+
]
|
2115 |
+
|
2116 |
[[package]]
|
2117 |
name = "uvicorn"
|
2118 |
version = "0.34.2"
|
|
|
2126 |
{ url = "https://files.pythonhosted.org/packages/b1/4b/4cef6ce21a2aaca9d852a6e84ef4f135d99fcd74fa75105e2fc0c8308acd/uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403", size = 62483 },
|
2127 |
]
|
2128 |
|
2129 |
+
[[package]]
|
2130 |
+
name = "websocket-client"
|
2131 |
+
version = "1.8.0"
|
2132 |
+
source = { registry = "https://pypi.org/simple" }
|
2133 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 }
|
2134 |
+
wheels = [
|
2135 |
+
{ url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
|
2136 |
+
]
|
2137 |
+
|
2138 |
[[package]]
|
2139 |
name = "websockets"
|
2140 |
version = "15.0.1"
|
|
|
2175 |
]
|
2176 |
sdist = { url = "https://files.pythonhosted.org/packages/b9/aa/2e35be124dfc7e581480705f912040172f6570cc12e68a245ba9258c32ef/wikipedia_api-0.8.1.tar.gz", hash = "sha256:b31e93b3f5407c1a1ba413ed7326a05379a3c270df6cf6a211aca67a14c5658b", size = 19934 }
|
2177 |
|
2178 |
+
[[package]]
|
2179 |
+
name = "wsproto"
|
2180 |
+
version = "1.2.0"
|
2181 |
+
source = { registry = "https://pypi.org/simple" }
|
2182 |
+
dependencies = [
|
2183 |
+
{ name = "h11" },
|
2184 |
+
]
|
2185 |
+
sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425 }
|
2186 |
+
wheels = [
|
2187 |
+
{ url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226 },
|
2188 |
+
]
|
2189 |
+
|
2190 |
[[package]]
|
2191 |
name = "yarl"
|
2192 |
version = "1.20.0"
|