zvl commited on
Commit
930fb8e
·
verified ·
1 Parent(s): a3ad57b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -9,6 +9,40 @@ from bs4 import BeautifulSoup
9
  from Gradio_UI import GradioUI
10
 
11
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @tool
13
  def extract_metadata_from_url(url: str) -> dict:
14
  """Extrae todos los metadatos de una página web.
@@ -78,7 +112,7 @@ with open("prompts.yaml", 'r') as stream:
78
 
79
  agent = CodeAgent(
80
  model=model,
81
- tools=[final_answer, extract_metadata_from_url], ## add your tools here (don't remove final answer)
82
  max_steps=6,
83
  verbosity_level=1,
84
  grammar=None,
 
9
  from Gradio_UI import GradioUI
10
 
11
  # Below is an example of a tool that does nothing. Amaze us with your creativity !
12
+ @tool
13
+ def scrape_webpage(url: str, tag: str = "p", class_name: str = None) -> dict:
14
+ """Extrae contenido de una página web según una etiqueta HTML y clase opcional.
15
+
16
+ Args:
17
+ url: URL de la página a scrapear.
18
+ tag: Etiqueta HTML a extraer (por defecto <p>).
19
+ class_name: Clase CSS opcional para filtrar resultados.
20
+
21
+ Returns:
22
+ Un diccionario con el contenido extraído.
23
+ """
24
+ try:
25
+ headers = {'User-Agent': 'Mozilla/5.0'}
26
+ response = requests.get(url, headers=headers)
27
+ response.raise_for_status()
28
+
29
+ soup = BeautifulSoup(response.text, 'html.parser')
30
+
31
+ if class_name:
32
+ elements = soup.find_all(tag, class_=class_name)
33
+ else:
34
+ elements = soup.find_all(tag)
35
+
36
+ extracted_data = [element.get_text(strip=True) for element in elements]
37
+
38
+ return {"url": url, "scraped_data": extracted_data[:20]} # Limita a 10 resultados
39
+
40
+ except requests.exceptions.RequestException as e:
41
+ return {"error": f"Error al acceder a la URL: {str(e)}"}
42
+ except Exception as e:
43
+ return {"error": f"Error inesperado: {str(e)}"}
44
+
45
+
46
  @tool
47
  def extract_metadata_from_url(url: str) -> dict:
48
  """Extrae todos los metadatos de una página web.
 
112
 
113
  agent = CodeAgent(
114
  model=model,
115
+ tools=[final_answer, extract_metadata_from_url, scrape_webpage], ## add your tools here (don't remove final answer)
116
  max_steps=6,
117
  verbosity_level=1,
118
  grammar=None,