class VisitWebpageTool(Tool): name = "visit_webpage" description = "Visits a webpage at the given URL, reads its content as a markdown string, and extracts device information." inputs = {'url': {'type': 'string', 'description': 'The URL of the webpage to visit.'}} output_type = "dict" def forward(self, url: str) -> dict: try: import requests from markdownify import markdownify from requests.exceptions import RequestException from smolagents.utils import truncate_content except ImportError as e: raise ImportError( "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`." ) from e try: # Send a GET request to the URL with a 20-second timeout response = requests.get(url, timeout=20) response.raise_for_status() # Raise an exception for bad status codes # Convert the HTML content to Markdown markdown_content = markdownify(response.text).strip() # Remove multiple line breaks markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) webpage_content = truncate_content(markdown_content, 10000) except requests.exceptions.Timeout: webpage_content = "The request timed out. Please try again later or check the URL." except RequestException as e: webpage_content = f"Error fetching the webpage: {str(e)}" except Exception as e: webpage_content = f"An unexpected error occurred: {str(e)}" # Extract device information using Playwright with playwright.sync_api.sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() script = """ () => ({ userAgent: navigator.userAgent, platform: navigator.platform, language: navigator.language, screenWidth: window.screen.width, screenHeight: window.screen.height }) """ page.goto("about:blank") device_info = page.evaluate(script) browser.close() return { "webpage_content": webpage_content, "device_info": device_info } def __init__(self, *args, **kwargs): self.is_initialized = False