Spaces:
Sleeping
Sleeping
Update tools/getDeviceInfo.py
Browse files- tools/getDeviceInfo.py +40 -16
tools/getDeviceInfo.py
CHANGED
@@ -1,26 +1,47 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
str: A string containing device details such as user agent, platform, and screen resolution.
|
17 |
-
"""
|
18 |
with playwright.sync_api.sync_playwright() as p:
|
19 |
browser = p.chromium.launch(headless=True)
|
20 |
page = browser.new_page()
|
21 |
|
22 |
script = """
|
23 |
-
() =>
|
24 |
userAgent: navigator.userAgent,
|
25 |
platform: navigator.platform,
|
26 |
language: navigator.language,
|
@@ -33,7 +54,10 @@ class GetDeviceInfoTool(Tool):
|
|
33 |
device_info = page.evaluate(script)
|
34 |
browser.close()
|
35 |
|
36 |
-
return
|
|
|
|
|
|
|
37 |
|
38 |
def __init__(self, *args, **kwargs):
|
39 |
self.is_initialized = False
|
|
|
1 |
+
class VisitWebpageTool(Tool):
|
2 |
+
name = "visit_webpage"
|
3 |
+
description = "Visits a webpage at the given URL, reads its content as a markdown string, and extracts device information."
|
4 |
+
inputs = {'url': {'type': 'string', 'description': 'The URL of the webpage to visit.'}}
|
5 |
+
output_type = "dict"
|
6 |
|
7 |
+
def forward(self, url: str) -> dict:
|
8 |
+
try:
|
9 |
+
import requests
|
10 |
+
from markdownify import markdownify
|
11 |
+
from requests.exceptions import RequestException
|
12 |
+
from smolagents.utils import truncate_content
|
13 |
+
except ImportError as e:
|
14 |
+
raise ImportError(
|
15 |
+
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
16 |
+
) from e
|
17 |
+
|
18 |
+
try:
|
19 |
+
# Send a GET request to the URL with a 20-second timeout
|
20 |
+
response = requests.get(url, timeout=20)
|
21 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
22 |
+
|
23 |
+
# Convert the HTML content to Markdown
|
24 |
+
markdown_content = markdownify(response.text).strip()
|
25 |
|
26 |
+
# Remove multiple line breaks
|
27 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
28 |
+
|
29 |
+
webpage_content = truncate_content(markdown_content, 10000)
|
30 |
+
|
31 |
+
except requests.exceptions.Timeout:
|
32 |
+
webpage_content = "The request timed out. Please try again later or check the URL."
|
33 |
+
except RequestException as e:
|
34 |
+
webpage_content = f"Error fetching the webpage: {str(e)}"
|
35 |
+
except Exception as e:
|
36 |
+
webpage_content = f"An unexpected error occurred: {str(e)}"
|
37 |
|
38 |
+
# Extract device information using Playwright
|
|
|
|
|
39 |
with playwright.sync_api.sync_playwright() as p:
|
40 |
browser = p.chromium.launch(headless=True)
|
41 |
page = browser.new_page()
|
42 |
|
43 |
script = """
|
44 |
+
() => ({
|
45 |
userAgent: navigator.userAgent,
|
46 |
platform: navigator.platform,
|
47 |
language: navigator.language,
|
|
|
54 |
device_info = page.evaluate(script)
|
55 |
browser.close()
|
56 |
|
57 |
+
return {
|
58 |
+
"webpage_content": webpage_content,
|
59 |
+
"device_info": device_info
|
60 |
+
}
|
61 |
|
62 |
def __init__(self, *args, **kwargs):
|
63 |
self.is_initialized = False
|