benjosaur commited on
Commit
5620621
·
1 Parent(s): 546cf90

Fix both system prompts

Browse files
Files changed (3) hide show
  1. search.py +2 -3
  2. tools.py +34 -25
  3. utils.py +4 -2
search.py CHANGED
@@ -44,11 +44,10 @@ class GoogleSearch:
44
  else:
45
  return f"Search failed with status {response.status}"
46
 
47
- async def google_image_search(self, query: str, num_results: int = 5) -> str:
48
  """
49
  Args:
50
  query: Search query
51
- num_results: Max results to return
52
  Returns:
53
  dict: JSON response from Google API.
54
  """
@@ -71,7 +70,7 @@ class GoogleSearch:
71
  results = "Web Search results:\n\n" + "\n\n".join(
72
  [
73
  f"Link:{result['link']}\nTitle:{result['title']}"
74
- for result in data["items"][:num_results]
75
  ]
76
  )
77
  return results
 
44
  else:
45
  return f"Search failed with status {response.status}"
46
 
47
+ async def google_image_search(self, query: str) -> str:
48
  """
49
  Args:
50
  query: Search query
 
51
  Returns:
52
  dict: JSON response from Google API.
53
  """
 
70
  results = "Web Search results:\n\n" + "\n\n".join(
71
  [
72
  f"Link:{result['link']}\nTitle:{result['title']}"
73
+ for result in data["items"][:4]
74
  ]
75
  )
76
  return results
tools.py CHANGED
@@ -61,39 +61,48 @@ def truncate_content(content: str, max_length: int = 10000) -> str:
61
  return content[:max_length]
62
 
63
 
64
- def transcribe_webpage(website_url: str) -> str:
65
- """Visits website url and returns markdown of contents"""
66
- try:
67
- # Send a GET request to the URL with a 20-second timeout
68
- response = requests.get(website_url, timeout=20)
69
- response.raise_for_status() # Raise an exception for bad status codes
 
 
 
 
 
 
 
 
 
70
 
71
- soup = BeautifulSoup(response.text, "html.parser")
72
- content_div = soup.find("div", id="mw-content-text")
73
 
74
- if not content_div:
75
- content_div = soup.find("div")
76
 
77
- # Only extract <p> and <table> tags
78
- elements = content_div.find_all(["p", "table"])
79
 
80
- # Join selected HTML chunks
81
- html_subset = "".join(str(el) for el in elements)
82
 
83
- # Convert the HTML content to Markdown
84
- markdown_content = html2text.HTML2Text().handle(str(html_subset))
85
 
86
- # Remove multiple line breaks
87
- markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
88
 
89
- return truncate_content(markdown_content, 20000)
90
 
91
- except requests.exceptions.Timeout:
92
- return "The request timed out. Please try again later or check the URL."
93
- except RequestException as e:
94
- return f"Error fetching the webpage: {str(e)}"
95
- except Exception as e:
96
- return f"An unexpected error occurred: {str(e)}"
97
 
98
 
99
  def parse_youtube_video(youtube_url: str) -> str:
 
61
  return content[:max_length]
62
 
63
 
64
+ class WebPageTranscription:
65
+ def __init__(self):
66
+ self.counter = 0
67
+
68
+ def transcribe_webpage(self, website_url: str) -> str:
69
+ """Visits website url and returns markdown of contents
70
+ Args:
71
+ website_url:str"""
72
+ if self.counter > 1:
73
+ return "No more transcriptions, move on"
74
+ self.counter += 1
75
+ try:
76
+ # Send a GET request to the URL with a 20-second timeout
77
+ response = requests.get(website_url, timeout=20)
78
+ response.raise_for_status() # Raise an exception for bad status codes
79
 
80
+ soup = BeautifulSoup(response.text, "html.parser")
81
+ content_div = soup.find("div", id="mw-content-text")
82
 
83
+ if not content_div:
84
+ content_div = soup.find("div")
85
 
86
+ # Only extract <p> and <table> tags
87
+ elements = content_div.find_all(["p", "table"])
88
 
89
+ # Join selected HTML chunks
90
+ html_subset = "".join(str(el) for el in elements)
91
 
92
+ # Convert the HTML content to Markdown
93
+ markdown_content = html2text.HTML2Text().handle(str(html_subset))
94
 
95
+ # Remove multiple line breaks
96
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
97
 
98
+ return truncate_content(markdown_content, 20000)
99
 
100
+ except requests.exceptions.Timeout:
101
+ return "The request timed out. Please try again later or check the URL."
102
+ except RequestException as e:
103
+ return f"Error fetching the webpage: {str(e)}"
104
+ except Exception as e:
105
+ return f"An unexpected error occurred: {str(e)}"
106
 
107
 
108
  def parse_youtube_video(youtube_url: str) -> str:
utils.py CHANGED
@@ -61,12 +61,14 @@ def format_final_answer(question: str, answer: str) -> str:
61
  messages=[
62
  {
63
  "role": "system",
64
- "content": """ You're tasked with reformatting an answer from an unreliable AI into the expected format as per their instructions.
65
  <instructions>
66
- You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
67
  </instructions>
 
68
  <question>"""
69
  + question
 
70
  + """
71
  Now here is their answer. Only reply with the corrected formatting
72
  """,
 
61
  messages=[
62
  {
63
  "role": "system",
64
+ "content": """ You're tasked with correcting/reformatting an answer from an unreliable AI into the expected format as per their instructions.
65
  <instructions>
66
+ You are a general AI assistant. I will ask you a question. Your answer should only be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
67
  </instructions>
68
+ For context, here is the question the AI answered
69
  <question>"""
70
  + question
71
+ + """</question>"""
72
  + """
73
  Now here is their answer. Only reply with the corrected formatting
74
  """,