File size: 2,827 Bytes
ef26eb0
 
 
 
 
 
 
 
 
 
 
6553c90
 
 
 
 
504d406
6553c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504d406
6553c90
 
 
 
 
 
 
 
 
 
 
504d406
6553c90
504d406
 
 
 
 
6553c90
504d406
 
 
 
 
 
 
 
 
 
 
 
6553c90
 
 
 
504d406
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from typing import Any, Optional
from smolagents.tools import Tool
import requests
import markdownify
import smolagents
import playwright.sync_api as playwright
import re
import yaml
from smolagents import CodeAgent, HfApiModel
from tools.final_answer import FinalAnswerTool

class VisitWebpageTool(Tool):
    name = "visit_webpage"
    description = "Visits a webpage at the given URL, reads its content as a markdown string, and extracts device information."
    inputs = {'url': {'type': 'string', 'description': 'The URL of the webpage to visit.'}}
    output_type = "dict"

    def forward(self, url: str) -> dict:
        try:
            import requests
            from markdownify import markdownify
            from requests.exceptions import RequestException
            from smolagents.utils import truncate_content
        except ImportError as e:
            raise ImportError(
                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
            ) from e
        
        try:
            # Send a GET request to the URL with a 20-second timeout
            response = requests.get(url, timeout=20)
            response.raise_for_status()  # Raise an exception for bad status codes

            # Convert the HTML content to Markdown
            markdown_content = markdownify(response.text).strip()

            # Remove multiple line breaks
            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)

            webpage_content = truncate_content(markdown_content, 10000)
        
        except requests.exceptions.Timeout:
            webpage_content = "The request timed out. Please try again later or check the URL."
        except RequestException as e:
            webpage_content = f"Error fetching the webpage: {str(e)}"
        except Exception as e:
            webpage_content = f"An unexpected error occurred: {str(e)}"
        
        # Extract device information using Playwright
        with playwright.sync_api.sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            
            script = """
            () => ({
                userAgent: navigator.userAgent,
                platform: navigator.platform,
                language: navigator.language,
                screenWidth: window.screen.width,
                screenHeight: window.screen.height
            })
            """
            
            page.goto("about:blank")
            device_info = page.evaluate(script)
            browser.close()
        
        return {
            "webpage_content": webpage_content,
            "device_info": device_info
        }
    
    def __init__(self, *args, **kwargs):
        self.is_initialized = False