KingNish commited on
Commit
202f621
·
verified ·
1 Parent(s): 7020310

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -40
app.py CHANGED
@@ -44,58 +44,42 @@ def extract_text_from_webpage(html_content):
44
  visible_text = soup.get_text(strip=True)
45
  return visible_text
46
 
47
- def search(term, num_results=1, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
48
  """Performs a Google search and returns the results."""
49
  escaped_term = urllib.parse.quote_plus(term)
50
- start = 0
51
  all_results = []
52
-
53
- # Fetch results in batches
54
- while start < num_results:
55
- resp = requests.get(
56
  url="https://www.google.com/search",
57
  headers={"User-Agent": get_useragent()}, # Set random user agent
58
  params={
59
  "q": term,
60
- "num": num_results - start, # Number of results to fetch in this batch
61
- "hl": lang,
62
- "start": start,
63
- "safe": safe,
64
  },
65
  timeout=timeout,
66
  verify=ssl_verify,
67
  )
68
- resp.raise_for_status() # Raise an exception if request fails
69
-
70
- soup = BeautifulSoup(resp.text, "html.parser")
71
- result_block = soup.find_all("div", attrs={"class": "g"})
72
-
73
- # If no results, continue to the next batch
74
- if not result_block:
75
- start += 1
76
- continue
77
-
78
- # Extract link and text from each result
79
- for result in result_block:
80
- link = result.find("a", href=True)
81
- if link:
82
- link = link["href"]
83
- try:
84
  # Fetch webpage content
85
- webpage = requests.get(link, headers={"User-Agent": get_useragent()})
86
- webpage.raise_for_status()
87
  # Extract visible text from webpage
88
- visible_text = extract_text_from_webpage(webpage.text)
89
- all_results.append({"link": link, "text": visible_text})
90
- except requests.exceptions.RequestException as e:
91
  # Handle errors fetching or processing webpage
92
- print(f"Error fetching or processing {link}: {e}")
93
- all_results.append({"link": link, "text": None})
94
  else:
95
- all_results.append({"link": None, "text": None})
96
-
97
- start += len(result_block) # Update starting index for next batch
98
-
99
  return all_results
100
 
101
  # Speech Recognition Model Configuration
@@ -108,7 +92,7 @@ encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfold
108
  tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
109
 
110
  # Mistral Model Configuration
111
- client1 = InferenceClient("google/gemma-1.1-7b-it")
112
  system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
113
 
114
  def resample(audio_fp32, sr):
@@ -143,11 +127,11 @@ def model(text, web_search):
143
  web_results = search(text)
144
  web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
145
  formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
146
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
147
  return "".join([response.token.text for response in stream if response.token.text != "</s>"])
148
  else:
149
  formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
150
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
151
  return "".join([response.token.text for response in stream if response.token.text != "</s>"])
152
 
153
  async def respond(audio, web_search):
 
44
  visible_text = soup.get_text(strip=True)
45
  return visible_text
46
 
47
+ def search(term, num_results=3, timeout=5, ssl_verify=None):
48
  """Performs a Google search and returns the results."""
49
  escaped_term = urllib.parse.quote_plus(term)
 
50
  all_results = []
51
+ resp = requests.get(
 
 
 
52
  url="https://www.google.com/search",
53
  headers={"User-Agent": get_useragent()}, # Set random user agent
54
  params={
55
  "q": term,
56
+ "num": num_results,
57
+ "udm": 14,
 
 
58
  },
59
  timeout=timeout,
60
  verify=ssl_verify,
61
  )
62
+ resp.raise_for_status() # Raise an exception if request fails
63
+ soup = BeautifulSoup(resp.text, "html.parser")
64
+ result_block = soup.find_all("div", attrs={"class": "g"})
65
+ for result in result_block:
66
+ link = result.find("a", href=True)
67
+ if link:
68
+ link = link["href"]
69
+ try:
 
 
 
 
 
 
 
 
70
  # Fetch webpage content
71
+ webpage = requests.get(link, headers={"User-Agent": get_useragent()})
72
+ webpage.raise_for_status()
73
  # Extract visible text from webpage
74
+ visible_text = extract_text_from_webpage(webpage.text)
75
+ all_results.append({"link": link, "text": visible_text})
76
+ except requests.exceptions.RequestException as e:
77
  # Handle errors fetching or processing webpage
78
+ print(f"Error fetching or processing {link}: {e}")
79
+ all_results.append({"link": link, "text": None})
80
  else:
81
+ all_results.append({"link": None, "text": None})
82
+ print(all_results)
 
 
83
  return all_results
84
 
85
  # Speech Recognition Model Configuration
 
92
  tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
93
 
94
  # Mistral Model Configuration
95
+ client1 = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
96
  system_instructions1 = "<s>[SYSTEM] Answer as OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses. The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
97
 
98
  def resample(audio_fp32, sr):
 
127
  web_results = search(text)
128
  web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
129
  formatted_prompt = system_instructions1 + text + "[WEB]" + str(web2) + "[OpenGPT 4o]"
130
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=300, stream=True, details=True, return_full_text=False)
131
  return "".join([response.token.text for response in stream if response.token.text != "</s>"])
132
  else:
133
  formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
134
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=300, stream=True, details=True, return_full_text=False)
135
  return "".join([response.token.text for response in stream if response.token.text != "</s>"])
136
 
137
  async def respond(audio, web_search):