Upload free_ask_internet.py
Browse files- free_ask_internet.py +18 -19
free_ask_internet.py
CHANGED
@@ -26,19 +26,16 @@ def extract_url_content(url):
|
|
26 |
return {"url": url, "content": content}
|
27 |
|
28 |
|
29 |
-
def search_web_ref(query:
|
|
|
30 |
content_list = []
|
31 |
|
32 |
try:
|
|
|
33 |
safe_string = urllib.parse.quote_plus(":all !general " + query)
|
34 |
|
35 |
searxng_url = os.environ.get('SEARXNG_URL')
|
36 |
-
|
37 |
-
"q": safe_string,
|
38 |
-
"language": "zh-CN" if lang.startswith("zh") else "en-US",
|
39 |
-
"time_range": "day" # 限制当天结果
|
40 |
-
}
|
41 |
-
response = requests.get(searxng_url, params=params)
|
42 |
response.raise_for_status()
|
43 |
search_results = response.json()
|
44 |
|
@@ -46,6 +43,7 @@ def search_web_ref(query: str, lang="zh-CN", debug=False):
|
|
46 |
print("JSON Response:")
|
47 |
pprint(search_results)
|
48 |
pedding_urls = []
|
|
|
49 |
conv_links = []
|
50 |
|
51 |
if search_results.get('results'):
|
@@ -58,15 +56,15 @@ def search_web_ref(query: str, lang="zh-CN", debug=False):
|
|
58 |
if url:
|
59 |
url_parsed = urlparse(url)
|
60 |
domain = url_parsed.netloc
|
61 |
-
icon_url =
|
62 |
site_name = tldextract.extract(url).domain
|
63 |
|
64 |
conv_links.append({
|
65 |
-
'site_name':
|
66 |
-
'icon_url':
|
67 |
-
'title':
|
68 |
-
'url':
|
69 |
-
'snippet':
|
70 |
})
|
71 |
|
72 |
results = []
|
@@ -74,28 +72,29 @@ def search_web_ref(query: str, lang="zh-CN", debug=False):
|
|
74 |
|
75 |
executor = ThreadPoolExecutor(max_workers=10)
|
76 |
for url in pedding_urls:
|
77 |
-
futures.append(executor.submit(extract_url_content,
|
78 |
try:
|
79 |
for future in futures:
|
80 |
res = future.result(timeout=5)
|
81 |
results.append(res)
|
82 |
except concurrent.futures.TimeoutError:
|
83 |
print("任务执行超时")
|
84 |
-
executor.shutdown(wait=False,
|
85 |
|
86 |
for content in results:
|
87 |
if content and content.get('content'):
|
|
|
88 |
item_dict = {
|
89 |
-
"url":
|
90 |
"content": content.get('content'),
|
91 |
-
"length":
|
92 |
}
|
93 |
content_list.append(item_dict)
|
94 |
if debug:
|
95 |
print("URL: {}".format(url))
|
96 |
print("=================")
|
97 |
|
98 |
-
return
|
99 |
except Exception as ex:
|
100 |
raise ex
|
101 |
|
@@ -214,7 +213,7 @@ def chat(prompt, model: str, stream=True, debug=False):
|
|
214 |
print(total_content)
|
215 |
|
216 |
def ask_internet(query: str, model: str, debug=False):
|
217 |
-
content_list = search_web_ref(query,
|
218 |
if debug:
|
219 |
print(content_list)
|
220 |
prompt = gen_prompt(query, content_list, context_length_limit=6000, debug=debug)
|
|
|
26 |
return {"url": url, "content": content}
|
27 |
|
28 |
|
29 |
+
def search_web_ref(query:str, debug=False):
|
30 |
+
|
31 |
content_list = []
|
32 |
|
33 |
try:
|
34 |
+
|
35 |
safe_string = urllib.parse.quote_plus(":all !general " + query)
|
36 |
|
37 |
searxng_url = os.environ.get('SEARXNG_URL')
|
38 |
+
response = requests.get(searxng_url + '?q=' + safe_string + '&format=json')
|
|
|
|
|
|
|
|
|
|
|
39 |
response.raise_for_status()
|
40 |
search_results = response.json()
|
41 |
|
|
|
43 |
print("JSON Response:")
|
44 |
pprint(search_results)
|
45 |
pedding_urls = []
|
46 |
+
|
47 |
conv_links = []
|
48 |
|
49 |
if search_results.get('results'):
|
|
|
56 |
if url:
|
57 |
url_parsed = urlparse(url)
|
58 |
domain = url_parsed.netloc
|
59 |
+
icon_url = url_parsed.scheme + '://' + url_parsed.netloc + '/favicon.ico'
|
60 |
site_name = tldextract.extract(url).domain
|
61 |
|
62 |
conv_links.append({
|
63 |
+
'site_name':site_name,
|
64 |
+
'icon_url':icon_url,
|
65 |
+
'title':name,
|
66 |
+
'url':url,
|
67 |
+
'snippet':snippet
|
68 |
})
|
69 |
|
70 |
results = []
|
|
|
72 |
|
73 |
executor = ThreadPoolExecutor(max_workers=10)
|
74 |
for url in pedding_urls:
|
75 |
+
futures.append(executor.submit(extract_url_content,url))
|
76 |
try:
|
77 |
for future in futures:
|
78 |
res = future.result(timeout=5)
|
79 |
results.append(res)
|
80 |
except concurrent.futures.TimeoutError:
|
81 |
print("任务执行超时")
|
82 |
+
executor.shutdown(wait=False,cancel_futures=True)
|
83 |
|
84 |
for content in results:
|
85 |
if content and content.get('content'):
|
86 |
+
|
87 |
item_dict = {
|
88 |
+
"url":content.get('url'),
|
89 |
"content": content.get('content'),
|
90 |
+
"length":len(content.get('content'))
|
91 |
}
|
92 |
content_list.append(item_dict)
|
93 |
if debug:
|
94 |
print("URL: {}".format(url))
|
95 |
print("=================")
|
96 |
|
97 |
+
return content_list
|
98 |
except Exception as ex:
|
99 |
raise ex
|
100 |
|
|
|
213 |
print(total_content)
|
214 |
|
215 |
def ask_internet(query: str, model: str, debug=False):
|
216 |
+
content_list = search_web_ref(query,debug=debug)
|
217 |
if debug:
|
218 |
print(content_list)
|
219 |
prompt = gen_prompt(query, content_list, context_length_limit=6000, debug=debug)
|