Spaces:
Sleeping
Sleeping
针对搜索&索引功能增加语言选择
Browse files- ChuanhuChatbot.py +10 -0
- chat_func.py +10 -2
- llama_func.py +5 -2
- presets.py +11 -3
ChuanhuChatbot.py
CHANGED
@@ -170,6 +170,12 @@ with gr.Blocks(
|
|
170 |
label="实时传输回答", value=True, visible=enable_streaming_option
|
171 |
)
|
172 |
use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
|
174 |
|
175 |
with gr.Tab(label="Prompt"):
|
@@ -293,6 +299,7 @@ with gr.Blocks(
|
|
293 |
model_select_dropdown,
|
294 |
use_websearch_checkbox,
|
295 |
index_files,
|
|
|
296 |
],
|
297 |
[chatbot, history, status_display, token_count],
|
298 |
show_progress=True,
|
@@ -315,6 +322,7 @@ with gr.Blocks(
|
|
315 |
model_select_dropdown,
|
316 |
use_websearch_checkbox,
|
317 |
index_files,
|
|
|
318 |
],
|
319 |
[chatbot, history, status_display, token_count],
|
320 |
show_progress=True,
|
@@ -339,6 +347,7 @@ with gr.Blocks(
|
|
339 |
temperature,
|
340 |
use_streaming_checkbox,
|
341 |
model_select_dropdown,
|
|
|
342 |
],
|
343 |
[chatbot, history, status_display, token_count],
|
344 |
show_progress=True,
|
@@ -363,6 +372,7 @@ with gr.Blocks(
|
|
363 |
temperature,
|
364 |
gr.State(0),
|
365 |
model_select_dropdown,
|
|
|
366 |
],
|
367 |
[chatbot, history, status_display, token_count],
|
368 |
show_progress=True,
|
|
|
170 |
label="实时传输回答", value=True, visible=enable_streaming_option
|
171 |
)
|
172 |
use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
|
173 |
+
language_select_dropdown = gr.Dropdown(
|
174 |
+
label="选择回复语言(针对搜索&索引功能)",
|
175 |
+
choices=REPLY_LANGUAGES,
|
176 |
+
multiselect=False,
|
177 |
+
value=REPLY_LANGUAGES[0]
|
178 |
+
)
|
179 |
index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
|
180 |
|
181 |
with gr.Tab(label="Prompt"):
|
|
|
299 |
model_select_dropdown,
|
300 |
use_websearch_checkbox,
|
301 |
index_files,
|
302 |
+
language_select_dropdown,
|
303 |
],
|
304 |
[chatbot, history, status_display, token_count],
|
305 |
show_progress=True,
|
|
|
322 |
model_select_dropdown,
|
323 |
use_websearch_checkbox,
|
324 |
index_files,
|
325 |
+
language_select_dropdown,
|
326 |
],
|
327 |
[chatbot, history, status_display, token_count],
|
328 |
show_progress=True,
|
|
|
347 |
temperature,
|
348 |
use_streaming_checkbox,
|
349 |
model_select_dropdown,
|
350 |
+
language_select_dropdown,
|
351 |
],
|
352 |
[chatbot, history, status_display, token_count],
|
353 |
show_progress=True,
|
|
|
372 |
temperature,
|
373 |
gr.State(0),
|
374 |
model_select_dropdown,
|
375 |
+
language_select_dropdown,
|
376 |
],
|
377 |
[chatbot, history, status_display, token_count],
|
378 |
show_progress=True,
|
chat_func.py
CHANGED
@@ -262,9 +262,12 @@ def predict(
|
|
262 |
selected_model=MODELS[0],
|
263 |
use_websearch=False,
|
264 |
files = None,
|
|
|
265 |
should_check_token_count=True,
|
266 |
): # repetition_penalty, top_k
|
267 |
logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
|
|
|
|
268 |
if files:
|
269 |
msg = "构建索引中……(这可能需要比较久的时间)"
|
270 |
logging.info(msg)
|
@@ -272,7 +275,7 @@ def predict(
|
|
272 |
index = construct_index(openai_api_key, file_src=files)
|
273 |
msg = "索引构建完成,获取回答中……"
|
274 |
yield chatbot, history, msg, all_token_counts
|
275 |
-
history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot)
|
276 |
yield chatbot, history, status_text, all_token_counts
|
277 |
return
|
278 |
|
@@ -292,6 +295,7 @@ def predict(
|
|
292 |
replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
|
293 |
.replace("{query}", inputs)
|
294 |
.replace("{web_results}", "\n\n".join(web_results))
|
|
|
295 |
)
|
296 |
else:
|
297 |
link_references = ""
|
@@ -389,6 +393,7 @@ def retry(
|
|
389 |
temperature,
|
390 |
stream=False,
|
391 |
selected_model=MODELS[0],
|
|
|
392 |
):
|
393 |
logging.info("重试中……")
|
394 |
if len(history) == 0:
|
@@ -408,6 +413,7 @@ def retry(
|
|
408 |
temperature,
|
409 |
stream=stream,
|
410 |
selected_model=selected_model,
|
|
|
411 |
)
|
412 |
logging.info("重试中……")
|
413 |
for x in iter:
|
@@ -425,6 +431,7 @@ def reduce_token_size(
|
|
425 |
temperature,
|
426 |
max_token_count,
|
427 |
selected_model=MODELS[0],
|
|
|
428 |
):
|
429 |
logging.info("开始减少token数量……")
|
430 |
iter = predict(
|
@@ -438,6 +445,7 @@ def reduce_token_size(
|
|
438 |
temperature,
|
439 |
selected_model=selected_model,
|
440 |
should_check_token_count=False,
|
|
|
441 |
)
|
442 |
logging.info(f"chatbot: {chatbot}")
|
443 |
flag = False
|
@@ -453,4 +461,4 @@ def reduce_token_size(
|
|
453 |
sum(token_count) if len(token_count) > 0 else 0,
|
454 |
), token_count
|
455 |
logging.info(msg)
|
456 |
-
logging.info("减少token数量完毕")
|
|
|
262 |
selected_model=MODELS[0],
|
263 |
use_websearch=False,
|
264 |
files = None,
|
265 |
+
reply_language="中文",
|
266 |
should_check_token_count=True,
|
267 |
): # repetition_penalty, top_k
|
268 |
logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
269 |
+
if reply_language == "跟随问题语言(不稳定)":
|
270 |
+
reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
|
271 |
if files:
|
272 |
msg = "构建索引中……(这可能需要比较久的时间)"
|
273 |
logging.info(msg)
|
|
|
275 |
index = construct_index(openai_api_key, file_src=files)
|
276 |
msg = "索引构建完成,获取回答中……"
|
277 |
yield chatbot, history, msg, all_token_counts
|
278 |
+
history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot, reply_language)
|
279 |
yield chatbot, history, status_text, all_token_counts
|
280 |
return
|
281 |
|
|
|
295 |
replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
|
296 |
.replace("{query}", inputs)
|
297 |
.replace("{web_results}", "\n\n".join(web_results))
|
298 |
+
.replace("{reply_language}", reply_language )
|
299 |
)
|
300 |
else:
|
301 |
link_references = ""
|
|
|
393 |
temperature,
|
394 |
stream=False,
|
395 |
selected_model=MODELS[0],
|
396 |
+
reply_language="中文",
|
397 |
):
|
398 |
logging.info("重试中……")
|
399 |
if len(history) == 0:
|
|
|
413 |
temperature,
|
414 |
stream=stream,
|
415 |
selected_model=selected_model,
|
416 |
+
reply_language=reply_language,
|
417 |
)
|
418 |
logging.info("重试中……")
|
419 |
for x in iter:
|
|
|
431 |
temperature,
|
432 |
max_token_count,
|
433 |
selected_model=MODELS[0],
|
434 |
+
reply_language="中文",
|
435 |
):
|
436 |
logging.info("开始减少token数量……")
|
437 |
iter = predict(
|
|
|
445 |
temperature,
|
446 |
selected_model=selected_model,
|
447 |
should_check_token_count=False,
|
448 |
+
reply_language=reply_language,
|
449 |
)
|
450 |
logging.info(f"chatbot: {chatbot}")
|
451 |
flag = False
|
|
|
461 |
sum(token_count) if len(token_count) > 0 else 0,
|
462 |
), token_count
|
463 |
logging.info(msg)
|
464 |
+
logging.info("减少token数量完毕")
|
llama_func.py
CHANGED
@@ -102,6 +102,7 @@ def chat_ai(
|
|
102 |
question,
|
103 |
context,
|
104 |
chatbot,
|
|
|
105 |
):
|
106 |
os.environ["OPENAI_API_KEY"] = api_key
|
107 |
|
@@ -116,6 +117,7 @@ def chat_ai(
|
|
116 |
SIM_K,
|
117 |
INDEX_QUERY_TEMPRATURE,
|
118 |
context,
|
|
|
119 |
)
|
120 |
if response is None:
|
121 |
status_text = "查询失败,请换个问法试试"
|
@@ -139,6 +141,7 @@ def ask_ai(
|
|
139 |
sim_k=1,
|
140 |
temprature=0,
|
141 |
prefix_messages=[],
|
|
|
142 |
):
|
143 |
os.environ["OPENAI_API_KEY"] = api_key
|
144 |
|
@@ -153,8 +156,8 @@ def ask_ai(
|
|
153 |
)
|
154 |
|
155 |
response = None # Initialize response variable to avoid UnboundLocalError
|
156 |
-
qa_prompt = QuestionAnswerPrompt(prompt_tmpl)
|
157 |
-
rf_prompt = RefinePrompt(refine_tmpl)
|
158 |
response = index.query(
|
159 |
question,
|
160 |
llm_predictor=llm_predictor,
|
|
|
102 |
question,
|
103 |
context,
|
104 |
chatbot,
|
105 |
+
reply_language,
|
106 |
):
|
107 |
os.environ["OPENAI_API_KEY"] = api_key
|
108 |
|
|
|
117 |
SIM_K,
|
118 |
INDEX_QUERY_TEMPRATURE,
|
119 |
context,
|
120 |
+
reply_language,
|
121 |
)
|
122 |
if response is None:
|
123 |
status_text = "查询失败,请换个问法试试"
|
|
|
141 |
sim_k=1,
|
142 |
temprature=0,
|
143 |
prefix_messages=[],
|
144 |
+
reply_language="中文",
|
145 |
):
|
146 |
os.environ["OPENAI_API_KEY"] = api_key
|
147 |
|
|
|
156 |
)
|
157 |
|
158 |
response = None # Initialize response variable to avoid UnboundLocalError
|
159 |
+
qa_prompt = QuestionAnswerPrompt(prompt_tmpl.replace("{reply_language}", reply_language))
|
160 |
+
rf_prompt = RefinePrompt(refine_tmpl.replace("{reply_language}", reply_language))
|
161 |
response = index.query(
|
162 |
question,
|
163 |
llm_predictor=llm_predictor,
|
presets.py
CHANGED
@@ -49,6 +49,13 @@ MODELS = [
|
|
49 |
"gpt-4-32k-0314",
|
50 |
] # 可选的模型
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
WEBSEARCH_PTOMPT_TEMPLATE = """\
|
54 |
Web search results:
|
@@ -58,7 +65,8 @@ Current date: {current_date}
|
|
58 |
|
59 |
Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
60 |
Query: {query}
|
61 |
-
Reply in
|
|
|
62 |
|
63 |
PROMPT_TEMPLATE = """\
|
64 |
Context information is below.
|
@@ -71,7 +79,7 @@ Make sure to cite results using [number] notation after the reference.
|
|
71 |
If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
|
72 |
Use prior knowledge only if the given context didn't provide enough information.
|
73 |
Answer the question: {query_str}
|
74 |
-
Reply in
|
75 |
"""
|
76 |
|
77 |
REFINE_TEMPLATE = """\
|
@@ -83,6 +91,6 @@ We have the opportunity to refine the existing answer
|
|
83 |
{context_msg}
|
84 |
------------
|
85 |
Given the new context, refine the original answer to better
|
86 |
-
|
87 |
If the context isn't useful, return the original answer.
|
88 |
"""
|
|
|
49 |
"gpt-4-32k-0314",
|
50 |
] # 可选的模型
|
51 |
|
52 |
+
REPLY_LANGUAGES = [
|
53 |
+
"中文",
|
54 |
+
"English",
|
55 |
+
"日本語",
|
56 |
+
"跟随问题语言(不稳定)"
|
57 |
+
]
|
58 |
+
|
59 |
|
60 |
WEBSEARCH_PTOMPT_TEMPLATE = """\
|
61 |
Web search results:
|
|
|
65 |
|
66 |
Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
67 |
Query: {query}
|
68 |
+
Reply in {reply_language}
|
69 |
+
"""
|
70 |
|
71 |
PROMPT_TEMPLATE = """\
|
72 |
Context information is below.
|
|
|
79 |
If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
|
80 |
Use prior knowledge only if the given context didn't provide enough information.
|
81 |
Answer the question: {query_str}
|
82 |
+
Reply in {reply_language}
|
83 |
"""
|
84 |
|
85 |
REFINE_TEMPLATE = """\
|
|
|
91 |
{context_msg}
|
92 |
------------
|
93 |
Given the new context, refine the original answer to better
|
94 |
+
Reply in {reply_language}
|
95 |
If the context isn't useful, return the original answer.
|
96 |
"""
|