deeme commited on
Commit
a90e364
·
verified ·
1 Parent(s): 288fe69

Upload free_ask_internet.py

Browse files
Files changed (1) hide show
  1. free_ask_internet.py +18 -19
free_ask_internet.py CHANGED
@@ -26,19 +26,16 @@ def extract_url_content(url):
26
  return {"url": url, "content": content}
27
 
28
 
29
- def search_web_ref(query: str, lang="zh-CN", debug=False):
 
30
  content_list = []
31
 
32
  try:
 
33
  safe_string = urllib.parse.quote_plus(":all !general " + query)
34
 
35
  searxng_url = os.environ.get('SEARXNG_URL')
36
- params = {
37
- "q": safe_string,
38
- "language": "zh-CN" if lang.startswith("zh") else "en-US",
39
- "time_range": "day" # 限制当天结果
40
- }
41
- response = requests.get(searxng_url, params=params)
42
  response.raise_for_status()
43
  search_results = response.json()
44
 
@@ -46,6 +43,7 @@ def search_web_ref(query: str, lang="zh-CN", debug=False):
46
  print("JSON Response:")
47
  pprint(search_results)
48
  pedding_urls = []
 
49
  conv_links = []
50
 
51
  if search_results.get('results'):
@@ -58,15 +56,15 @@ def search_web_ref(query: str, lang="zh-CN", debug=False):
58
  if url:
59
  url_parsed = urlparse(url)
60
  domain = url_parsed.netloc
61
- icon_url = url_parsed.scheme + '://' + url_parsed.netloc + '/favicon.ico'
62
  site_name = tldextract.extract(url).domain
63
 
64
  conv_links.append({
65
- 'site_name': site_name,
66
- 'icon_url': icon_url,
67
- 'title': name,
68
- 'url': url,
69
- 'snippet': snippet
70
  })
71
 
72
  results = []
@@ -74,28 +72,29 @@ def search_web_ref(query: str, lang="zh-CN", debug=False):
74
 
75
  executor = ThreadPoolExecutor(max_workers=10)
76
  for url in pedding_urls:
77
- futures.append(executor.submit(extract_url_content, url))
78
  try:
79
  for future in futures:
80
  res = future.result(timeout=5)
81
  results.append(res)
82
  except concurrent.futures.TimeoutError:
83
  print("任务执行超时")
84
- executor.shutdown(wait=False, cancel_futures=True)
85
 
86
  for content in results:
87
  if content and content.get('content'):
 
88
  item_dict = {
89
- "url": content.get('url'),
90
  "content": content.get('content'),
91
- "length": len(content.get('content'))
92
  }
93
  content_list.append(item_dict)
94
  if debug:
95
  print("URL: {}".format(url))
96
  print("=================")
97
 
98
- return content_list
99
  except Exception as ex:
100
  raise ex
101
 
@@ -214,7 +213,7 @@ def chat(prompt, model: str, stream=True, debug=False):
214
  print(total_content)
215
 
216
  def ask_internet(query: str, model: str, debug=False):
217
- content_list = search_web_ref(query, lang="en-US", debug=debug) # 确保使用英文
218
  if debug:
219
  print(content_list)
220
  prompt = gen_prompt(query, content_list, context_length_limit=6000, debug=debug)
 
26
  return {"url": url, "content": content}
27
 
28
 
29
+ def search_web_ref(query:str, debug=False):
30
+
31
  content_list = []
32
 
33
  try:
34
+
35
  safe_string = urllib.parse.quote_plus(":all !general " + query)
36
 
37
  searxng_url = os.environ.get('SEARXNG_URL')
38
+ response = requests.get(searxng_url + '?q=' + safe_string + '&format=json')
 
 
 
 
 
39
  response.raise_for_status()
40
  search_results = response.json()
41
 
 
43
  print("JSON Response:")
44
  pprint(search_results)
45
  pedding_urls = []
46
+
47
  conv_links = []
48
 
49
  if search_results.get('results'):
 
56
  if url:
57
  url_parsed = urlparse(url)
58
  domain = url_parsed.netloc
59
+ icon_url = url_parsed.scheme + '://' + url_parsed.netloc + '/favicon.ico'
60
  site_name = tldextract.extract(url).domain
61
 
62
  conv_links.append({
63
+ 'site_name':site_name,
64
+ 'icon_url':icon_url,
65
+ 'title':name,
66
+ 'url':url,
67
+ 'snippet':snippet
68
  })
69
 
70
  results = []
 
72
 
73
  executor = ThreadPoolExecutor(max_workers=10)
74
  for url in pedding_urls:
75
+ futures.append(executor.submit(extract_url_content,url))
76
  try:
77
  for future in futures:
78
  res = future.result(timeout=5)
79
  results.append(res)
80
  except concurrent.futures.TimeoutError:
81
  print("任务执行超时")
82
+ executor.shutdown(wait=False,cancel_futures=True)
83
 
84
  for content in results:
85
  if content and content.get('content'):
86
+
87
  item_dict = {
88
+ "url":content.get('url'),
89
  "content": content.get('content'),
90
+ "length":len(content.get('content'))
91
  }
92
  content_list.append(item_dict)
93
  if debug:
94
  print("URL: {}".format(url))
95
  print("=================")
96
 
97
+ return content_list
98
  except Exception as ex:
99
  raise ex
100
 
 
213
  print(total_content)
214
 
215
  def ask_internet(query: str, model: str, debug=False):
216
+ content_list = search_web_ref(query,debug=debug)
217
  if debug:
218
  print(content_list)
219
  prompt = gen_prompt(query, content_list, context_length_limit=6000, debug=debug)