Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,40 @@ from bs4 import BeautifulSoup
|
|
9 |
from Gradio_UI import GradioUI
|
10 |
|
11 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
@tool
|
13 |
def extract_metadata_from_url(url: str) -> dict:
|
14 |
"""Extrae todos los metadatos de una página web.
|
@@ -78,7 +112,7 @@ with open("prompts.yaml", 'r') as stream:
|
|
78 |
|
79 |
agent = CodeAgent(
|
80 |
model=model,
|
81 |
-
tools=[final_answer, extract_metadata_from_url], ## add your tools here (don't remove final answer)
|
82 |
max_steps=6,
|
83 |
verbosity_level=1,
|
84 |
grammar=None,
|
|
|
9 |
from Gradio_UI import GradioUI
|
10 |
|
11 |
# Below is an example of a tool that does nothing. Amaze us with your creativity !
|
12 |
+
@tool
|
13 |
+
def scrape_webpage(url: str, tag: str = "p", class_name: str = None) -> dict:
|
14 |
+
"""Extrae contenido de una página web según una etiqueta HTML y clase opcional.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
url: URL de la página a scrapear.
|
18 |
+
tag: Etiqueta HTML a extraer (por defecto <p>).
|
19 |
+
class_name: Clase CSS opcional para filtrar resultados.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
Un diccionario con el contenido extraído.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
headers = {'User-Agent': 'Mozilla/5.0'}
|
26 |
+
response = requests.get(url, headers=headers)
|
27 |
+
response.raise_for_status()
|
28 |
+
|
29 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
30 |
+
|
31 |
+
if class_name:
|
32 |
+
elements = soup.find_all(tag, class_=class_name)
|
33 |
+
else:
|
34 |
+
elements = soup.find_all(tag)
|
35 |
+
|
36 |
+
extracted_data = [element.get_text(strip=True) for element in elements]
|
37 |
+
|
38 |
+
return {"url": url, "scraped_data": extracted_data[:20]} # Limita a 10 resultados
|
39 |
+
|
40 |
+
except requests.exceptions.RequestException as e:
|
41 |
+
return {"error": f"Error al acceder a la URL: {str(e)}"}
|
42 |
+
except Exception as e:
|
43 |
+
return {"error": f"Error inesperado: {str(e)}"}
|
44 |
+
|
45 |
+
|
46 |
@tool
|
47 |
def extract_metadata_from_url(url: str) -> dict:
|
48 |
"""Extrae todos los metadatos de una página web.
|
|
|
112 |
|
113 |
agent = CodeAgent(
|
114 |
model=model,
|
115 |
+
tools=[final_answer, extract_metadata_from_url, scrape_webpage], ## add your tools here (don't remove final answer)
|
116 |
max_steps=6,
|
117 |
verbosity_level=1,
|
118 |
grammar=None,
|