KingNish commited on
Commit
5b3a290
·
verified ·
1 Parent(s): 9fe16b6

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +20 -44
chatbot.py CHANGED
@@ -236,54 +236,30 @@ def extract_text_from_webpage(html_content):
236
  visible_text = soup.get_text(strip=True)
237
  return visible_text
238
 
 
 
239
  # Perform a Google search and return the results
240
- def search(term, num_results=2, lang="en", advanced=True, timeout=5, safe="active", ssl_verify=None):
241
- """Performs a Google search and returns the results."""
242
- escaped_term = urllib.parse.quote_plus(term)
243
- start = 0
244
  all_results = []
245
  # Limit the number of characters from each webpage to stay under the token limit
246
- max_chars_per_page = 8000 # Adjust this value based on your token limit and average webpage length
247
-
248
- with requests.Session() as session:
249
- while start < num_results:
250
- resp = session.get(
251
- url="https://www.google.com/search",
252
- headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
253
- params={
254
- "q": term,
255
- "num": num_results - start,
256
- "hl": lang,
257
- "start": start,
258
- "safe": safe,
259
- },
260
- timeout=timeout,
261
- verify=ssl_verify,
262
- )
263
- resp.raise_for_status()
264
- soup = BeautifulSoup(resp.text, "html.parser")
265
- result_block = soup.find_all("div", attrs={"class": "g"})
266
- if not result_block:
267
- start += 1
268
- continue
269
- for result in result_block:
270
- link = result.find("a", href=True)
271
- if link:
272
- link = link["href"]
273
- try:
274
- webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
275
- webpage.raise_for_status()
276
- visible_text = extract_text_from_webpage(webpage.text)
277
  # Truncate text if it's too long
278
- if len(visible_text) > max_chars_per_page:
279
- visible_text = visible_text[:max_chars_per_page] + "..."
280
- all_results.append({"link": link, "text": visible_text})
281
- except requests.exceptions.RequestException as e:
282
- print(f"Error fetching or processing {link}: {e}")
283
- all_results.append({"link": link, "text": None})
284
- else:
285
- all_results.append({"link": None, "text": None})
286
- start += len(result_block)
287
  return all_results
288
 
289
  # Format the prompt for the language model
 
236
  visible_text = soup.get_text(strip=True)
237
  return visible_text
238
 
239
+ from duckduckgo_search import DDGS
240
+
241
  # Perform a Google search and return the results
242
+ def search(term):
 
 
 
243
  all_results = []
244
  # Limit the number of characters from each webpage to stay under the token limit
245
+ max_chars_per_page = 8000 # Adjust this value based on your token limit and average webpage length
246
+ result_block = DDGS().text(term, max_results=2)
247
+ for result in result_block:
248
+ if 'href' in result:
249
+ link = result["href"]
250
+ try:
251
+ webpage = requests.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
252
+ webpage.raise_for_status()
253
+ visible_text = extract_text_from_webpage(webpage.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  # Truncate text if it's too long
255
+ if len(visible_text) > max_chars_per_page:
256
+ visible_text = visible_text[:max_chars_per_page] + "..."
257
+ all_results.append({"link": link, "text": visible_text})
258
+ except requests.exceptions.RequestException as e:
259
+ print(f"Error fetching or processing {link}: {e}")
260
+ all_results.append({"link": link, "text": None})
261
+ else:
262
+ all_results.append({"link": None, "text": None})
 
263
  return all_results
264
 
265
  # Format the prompt for the language model