Csplk commited on
Commit
da7c6fd
·
verified ·
1 Parent(s): ba75b32

Create visit_webpage_tool.py

Browse files
Files changed (1) hide show
  1. visit_webpage_tool.py +35 -0
visit_webpage_tool.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Tool
2
+ import requests
3
+ from markdownify import markdownify as md
4
+ from requests.exceptions import RequestException
5
+ import re
6
+
7
+ class VisitWebpageTool(Tool):
8
+ name = "visit_webpage"
9
+ description = "Visits a webpage at the given URL and returns its content as a markdown string."
10
+ inputs = {
11
+ "url": {
12
+ "type": "text",
13
+ "description": "The URL of the webpage to visit.",
14
+ }
15
+ }
16
+ output_type = "text"
17
+
18
+ def forward(self, url: str) -> str:
19
+ try:
20
+ # Send a GET request to the URL
21
+ response = requests.get(url)
22
+ response.raise_for_status() # Raise an exception for bad status codes
23
+
24
+ # Convert the HTML content to Markdown
25
+ markdown_content = md(response.text).strip()
26
+
27
+ # Remove multiple line breaks
28
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
29
+
30
+ return markdown_content
31
+
32
+ except RequestException as e:
33
+ return f"Error fetching the webpage: {str(e)}"
34
+ except Exception as e:
35
+ return f"An unexpected error occurred: {str(e)}"