Spaces:

KingNish
/

Voice-Chat-AI

Running

App Files Files Community

KingNish commited on Jul 11, 2024

Commit

202f621

verified ·

1 Parent(s): 7020310

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -40

app.py CHANGED Viewed

@@ -44,58 +44,42 @@ def extract_text_from_webpage(html_content):
     visible_text = soup.get_text(strip=True)
     return visible_text
-def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
     """Performs a Google search and returns the results."""
     escaped_term = urllib.parse.quote_plus(term)
-    start = 0
     all_results = []
-    # Fetch results in batches
-    while start < num_results:
-        resp = requests.get(
             url="https://www.google.com/search",
             headers={"User-Agent": get_useragent()}, # Set random user agent
             params={
                 "q": term,
-                "num": num_results - start, # Number of results to fetch in this batch
-                "hl": lang,
-                "start": start,
-                "safe": safe,
             },
             timeout=timeout,
             verify=ssl_verify,
         )
-        resp.raise_for_status() # Raise an exception if request fails
-        soup = BeautifulSoup(resp.text, "html.parser")
-        result_block = soup.find_all("div", attrs={"class": "g"})
-        # If no results, continue to the next batch
-        if not result_block:
-            start += 1
-            continue
-        # Extract link and text from each result
-        for result in result_block:
-            link = result.find("a", href=True)
-            if link:
-                link = link["href"]
-                try:
                     # Fetch webpage content
-                    webpage = requests.get(link, headers={"User-Agent": get_useragent()})
-                    webpage.raise_for_status()
                     # Extract visible text from webpage
-                    visible_text = extract_text_from_webpage(webpage.text)
-                    all_results.append({"link": link, "text": visible_text})
-                except requests.exceptions.RequestException as e:
                     # Handle errors fetching or processing webpage
-                    print(f"Error fetching or processing {link}: {e}")
-                    all_results.append({"link": link, "text": None})
             else:
-                all_results.append({"link": None, "text": None})
-        start += len(result_block) # Update starting index for next batch
     return all_results
 # Speech Recognition Model Configuration
@@ -108,7 +92,7 @@ encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfold
 tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
 # Mistral Model Configuration
-client1 = InferenceClient("google/gemma-1.1-7b-it")
 system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
 def resample(audio_fp32, sr):
@@ -143,11 +127,11 @@ def model(text, web_search):
         web_results = search(text)
         web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
         formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
-        stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
         return "".join([response.token.text for response in stream if response.token.text != "</s>"])
     else:
         formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
-        stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
         return "".join([response.token.text for response in stream if response.token.text != "</s>"])
 async def respond(audio, web_search):

     visible_text = soup.get_text(strip=True)
     return visible_text
+def search(term, num_results=3, timeout=5, ssl_verify=None):
     """Performs a Google search and returns the results."""
     escaped_term = urllib.parse.quote_plus(term)
     all_results = []
+    resp = requests.get(
             url="https://www.google.com/search",
             headers={"User-Agent": get_useragent()}, # Set random user agent
             params={
                 "q": term,
+                "num": num_results,
+                "udm": 14,
             },
             timeout=timeout,
             verify=ssl_verify,
         )
+    resp.raise_for_status() # Raise an exception if request fails
+    soup = BeautifulSoup(resp.text, "html.parser")
+    result_block = soup.find_all("div", attrs={"class": "g"})
+    for result in result_block:
+        link = result.find("a", href=True)
+        if link:
+            link = link["href"]
+            try:
                     # Fetch webpage content
+                webpage = requests.get(link, headers={"User-Agent": get_useragent()})
+                webpage.raise_for_status()
                     # Extract visible text from webpage
+                visible_text = extract_text_from_webpage(webpage.text)
+                all_results.append({"link": link, "text": visible_text})
+            except requests.exceptions.RequestException as e:
                     # Handle errors fetching or processing webpage
+                print(f"Error fetching or processing {link}: {e}")
+                all_results.append({"link": link, "text": None})
             else:
+            all_results.append({"link": None, "text": None})
+    print(all_results)
     return all_results
 # Speech Recognition Model Configuration
 tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
 # Mistral Model Configuration
+client1 = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
 system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
 def resample(audio_fp32, sr):
         web_results = search(text)
         web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
         formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
+        stream = client1.text_generation(formatted_prompt, max_new_tokens=300, stream=True, details=True, return_full_text=False)
         return "".join([response.token.text for response in stream if response.token.text != "</s>"])
     else:
         formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
+        stream = client1.text_generation(formatted_prompt, max_new_tokens=300, stream=True, details=True, return_full_text=False)
         return "".join([response.token.text for response in stream if response.token.text != "</s>"])
 async def respond(audio, web_search):