SilviuMatei commited on
Commit
6553c90
·
verified ·
1 Parent(s): 2a86392

Update tools/getDeviceInfo.py

Browse files
Files changed (1) hide show
  1. tools/getDeviceInfo.py +40 -16
tools/getDeviceInfo.py CHANGED
@@ -1,26 +1,47 @@
1
- from typing import Any
2
- from smolagents.tools import Tool
3
- import playwright.sync_api as playwright
 
 
4
 
5
- class GetDeviceInfoTool(Tool):
6
- name = "get_device_info"
7
- description = "Opens a browser and uses JavaScript to retrieve device information silently."
8
- inputs = {}
9
- output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def forward(self) -> str:
12
- """
13
- Opens a headless browser using Playwright and executes JavaScript to silently retrieve device information.
 
 
 
 
 
 
 
 
14
 
15
- Returns:
16
- str: A string containing device details such as user agent, platform, and screen resolution.
17
- """
18
  with playwright.sync_api.sync_playwright() as p:
19
  browser = p.chromium.launch(headless=True)
20
  page = browser.new_page()
21
 
22
  script = """
23
- () => JSON.stringify({
24
  userAgent: navigator.userAgent,
25
  platform: navigator.platform,
26
  language: navigator.language,
@@ -33,7 +54,10 @@ class GetDeviceInfoTool(Tool):
33
  device_info = page.evaluate(script)
34
  browser.close()
35
 
36
- return device_info
 
 
 
37
 
38
  def __init__(self, *args, **kwargs):
39
  self.is_initialized = False
 
1
+ class VisitWebpageTool(Tool):
2
+ name = "visit_webpage"
3
+ description = "Visits a webpage at the given URL, reads its content as a markdown string, and extracts device information."
4
+ inputs = {'url': {'type': 'string', 'description': 'The URL of the webpage to visit.'}}
5
+ output_type = "dict"
6
 
7
+ def forward(self, url: str) -> dict:
8
+ try:
9
+ import requests
10
+ from markdownify import markdownify
11
+ from requests.exceptions import RequestException
12
+ from smolagents.utils import truncate_content
13
+ except ImportError as e:
14
+ raise ImportError(
15
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
16
+ ) from e
17
+
18
+ try:
19
+ # Send a GET request to the URL with a 20-second timeout
20
+ response = requests.get(url, timeout=20)
21
+ response.raise_for_status() # Raise an exception for bad status codes
22
+
23
+ # Convert the HTML content to Markdown
24
+ markdown_content = markdownify(response.text).strip()
25
 
26
+ # Remove multiple line breaks
27
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
28
+
29
+ webpage_content = truncate_content(markdown_content, 10000)
30
+
31
+ except requests.exceptions.Timeout:
32
+ webpage_content = "The request timed out. Please try again later or check the URL."
33
+ except RequestException as e:
34
+ webpage_content = f"Error fetching the webpage: {str(e)}"
35
+ except Exception as e:
36
+ webpage_content = f"An unexpected error occurred: {str(e)}"
37
 
38
+ # Extract device information using Playwright
 
 
39
  with playwright.sync_api.sync_playwright() as p:
40
  browser = p.chromium.launch(headless=True)
41
  page = browser.new_page()
42
 
43
  script = """
44
+ () => ({
45
  userAgent: navigator.userAgent,
46
  platform: navigator.platform,
47
  language: navigator.language,
 
54
  device_info = page.evaluate(script)
55
  browser.close()
56
 
57
+ return {
58
+ "webpage_content": webpage_content,
59
+ "device_info": device_info
60
+ }
61
 
62
  def __init__(self, *args, **kwargs):
63
  self.is_initialized = False