benjosaur commited on
Commit
3cb2af5
·
1 Parent(s): 8a99cdd

Add visit webpage tool. Update imports

Browse files
Files changed (3) hide show
  1. app.py +8 -2
  2. requirements.txt +4 -1
  3. tools.py +34 -0
app.py CHANGED
@@ -3,9 +3,14 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, AgentStream
7
  from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
8
- from tools import APIProcessor, parse_youtube_video, transcribe_image_from_link
 
 
 
 
 
9
  from search import GoogleSearch
10
  from dotenv import load_dotenv
11
 
@@ -40,6 +45,7 @@ class BasicAgent:
40
  get_and_process_question_attachment,
41
  parse_youtube_video,
42
  transcribe_image_from_link,
 
43
  ],
44
  llm=self.llm,
45
  system_prompt=SYSTEM_PROMPT,
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from llama_index.core.agent.workflow import AgentWorkflow
7
  from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
8
+ from tools import (
9
+ APIProcessor,
10
+ parse_youtube_video,
11
+ transcribe_image_from_link,
12
+ transcribe_webpage,
13
+ )
14
  from search import GoogleSearch
15
  from dotenv import load_dotenv
16
 
 
45
  get_and_process_question_attachment,
46
  parse_youtube_video,
47
  transcribe_image_from_link,
48
+ transcribe_webpage,
49
  ],
50
  llm=self.llm,
51
  system_prompt=SYSTEM_PROMPT,
requirements.txt CHANGED
@@ -6,4 +6,7 @@ openai
6
  pandas
7
  aiohttp
8
  Pillow
9
- yt-dlp
 
 
 
 
6
  pandas
7
  aiohttp
8
  Pillow
9
+ yt-dlp
10
+ markdownify
11
+ llama-index-utils-workflow
12
+ llama-index-llms-huggingface-api
tools.py CHANGED
@@ -7,6 +7,9 @@ import tempfile
7
  import os
8
  import io
9
  import yt_dlp
 
 
 
10
 
11
 
12
  def transcribe_image_from_link(image_link: str) -> str:
@@ -46,6 +49,37 @@ def transcribe_image_from_link(image_link: str) -> str:
46
  return transcribed_text
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def parse_youtube_video(youtube_url: str) -> str:
50
  """Returns text transcript of a youtube video
51
  Args:
 
7
  import os
8
  import io
9
  import yt_dlp
10
+ import re
11
+ from markdownify import markdownify
12
+ from requests.exceptions import RequestException
13
 
14
 
15
  def transcribe_image_from_link(image_link: str) -> str:
 
49
  return transcribed_text
50
 
51
 
52
+ def transcribe_webpage(website_url: str) -> str:
53
+ """Visits website url and returns markdown of contents"""
54
+ try:
55
+ # Send a GET request to the URL with a 20-second timeout
56
+ response = requests.get(website_url, timeout=20)
57
+ response.raise_for_status() # Raise an exception for bad status codes
58
+
59
+ # Convert the HTML content to Markdown
60
+ markdown_content = markdownify(response.text).strip()
61
+
62
+ with open("webpage_content.md", "a") as f:
63
+ f.write("""Response from the webpage:\n\n""")
64
+ f.write(response.text)
65
+
66
+ # Remove multiple line breaks
67
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
68
+
69
+ with open("webpage_content.md", "a") as f:
70
+ f.write("\n\nMarkdown content:\n\n")
71
+ f.write(markdown_content)
72
+
73
+ return markdown_content
74
+
75
+ except requests.exceptions.Timeout:
76
+ return "The request timed out. Please try again later or check the URL."
77
+ except RequestException as e:
78
+ return f"Error fetching the webpage: {str(e)}"
79
+ except Exception as e:
80
+ return f"An unexpected error occurred: {str(e)}"
81
+
82
+
83
  def parse_youtube_video(youtube_url: str) -> str:
84
  """Returns text transcript of a youtube video
85
  Args: