Final_Assignment_Template

Sleeping

App Files Files Community

benjosaur commited on Jun 28

Commit

5620621

1 Parent(s): 546cf90

Fix both system prompts

Browse files

Files changed (3) hide show

search.py +2 -3
tools.py +34 -25
utils.py +4 -2

search.py CHANGED Viewed

@@ -44,11 +44,10 @@ class GoogleSearch:
                 else:
                     return f"Search failed with status {response.status}"
-    async def google_image_search(self, query: str, num_results: int = 5) -> str:
         """
         Args:
             query: Search query
-            num_results: Max results to return
         Returns:
             dict: JSON response from Google API.
         """
@@ -71,7 +70,7 @@ class GoogleSearch:
                     results = "Web Search results:\n\n" + "\n\n".join(
                         [
                             f"Link:{result['link']}\nTitle:{result['title']}"
-                            for result in data["items"][:num_results]
                         ]
                     )
                     return results

                 else:
                     return f"Search failed with status {response.status}"
+    async def google_image_search(self, query: str) -> str:
         """
         Args:
             query: Search query
         Returns:
             dict: JSON response from Google API.
         """
                     results = "Web Search results:\n\n" + "\n\n".join(
                         [
                             f"Link:{result['link']}\nTitle:{result['title']}"
+                            for result in data["items"][:4]
                         ]
                     )
                     return results

tools.py CHANGED Viewed

@@ -61,39 +61,48 @@ def truncate_content(content: str, max_length: int = 10000) -> str:
         return content[:max_length]
-def transcribe_webpage(website_url: str) -> str:
-    """Visits website url and returns markdown of contents"""
-    try:
-        # Send a GET request to the URL with a 20-second timeout
-        response = requests.get(website_url, timeout=20)
-        response.raise_for_status()  # Raise an exception for bad status codes
-        soup = BeautifulSoup(response.text, "html.parser")
-        content_div = soup.find("div", id="mw-content-text")
-        if not content_div:
-            content_div = soup.find("div")
-        # Only extract <p> and <table> tags
-        elements = content_div.find_all(["p", "table"])
-        # Join selected HTML chunks
-        html_subset = "".join(str(el) for el in elements)
-        # Convert the HTML content to Markdown
-        markdown_content = html2text.HTML2Text().handle(str(html_subset))
-        # Remove multiple line breaks
-        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
-        return truncate_content(markdown_content, 20000)
-    except requests.exceptions.Timeout:
-        return "The request timed out. Please try again later or check the URL."
-    except RequestException as e:
-        return f"Error fetching the webpage: {str(e)}"
-    except Exception as e:
-        return f"An unexpected error occurred: {str(e)}"
 def parse_youtube_video(youtube_url: str) -> str:

         return content[:max_length]
+class WebPageTranscription:
+    def __init__(self):
+        self.counter = 0
+    def transcribe_webpage(self, website_url: str) -> str:
+        """Visits website url and returns markdown of contents
+        Args:
+        website_url:str"""
+        if self.counter > 1:
+            return "No more transcriptions, move on"
+        self.counter += 1
+        try:
+            # Send a GET request to the URL with a 20-second timeout
+            response = requests.get(website_url, timeout=20)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            soup = BeautifulSoup(response.text, "html.parser")
+            content_div = soup.find("div", id="mw-content-text")
+            if not content_div:
+                content_div = soup.find("div")
+            # Only extract <p> and <table> tags
+            elements = content_div.find_all(["p", "table"])
+            # Join selected HTML chunks
+            html_subset = "".join(str(el) for el in elements)
+            # Convert the HTML content to Markdown
+            markdown_content = html2text.HTML2Text().handle(str(html_subset))
+            # Remove multiple line breaks
+            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+            return truncate_content(markdown_content, 20000)
+        except requests.exceptions.Timeout:
+            return "The request timed out. Please try again later or check the URL."
+        except RequestException as e:
+            return f"Error fetching the webpage: {str(e)}"
+        except Exception as e:
+            return f"An unexpected error occurred: {str(e)}"
 def parse_youtube_video(youtube_url: str) -> str:

utils.py CHANGED Viewed

@@ -61,12 +61,14 @@ def format_final_answer(question: str, answer: str) -> str:
         messages=[
             {
                 "role": "system",
-                "content": """ You're tasked with reformatting an answer from an unreliable AI into the expected format as per their instructions.
             <instructions>
-            You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
             </instructions>
             <question>"""
                 + question
                 + """
             Now here is their answer. Only reply with the corrected formatting
             """,

         messages=[
             {
                 "role": "system",
+                "content": """ You're tasked with correcting/reformatting an answer from an unreliable AI into the expected format as per their instructions.
             <instructions>
+            You are a general AI assistant. I will ask you a question. Your answer should only be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
             </instructions>
+            For context, here is the question the AI answered
             <question>"""
                 + question
+                + """</question>"""
                 + """
             Now here is their answer. Only reply with the corrected formatting
             """,