MZhaovo commited on
Commit
be28103
·
1 Parent(s): 3fe8fc4

针对搜索&索引功能增加语言选择

Browse files
Files changed (4) hide show
  1. ChuanhuChatbot.py +10 -0
  2. chat_func.py +10 -2
  3. llama_func.py +5 -2
  4. presets.py +11 -3
ChuanhuChatbot.py CHANGED
@@ -170,6 +170,12 @@ with gr.Blocks(
170
  label="实时传输回答", value=True, visible=enable_streaming_option
171
  )
172
  use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
 
 
 
 
 
 
173
  index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
174
 
175
  with gr.Tab(label="Prompt"):
@@ -293,6 +299,7 @@ with gr.Blocks(
293
  model_select_dropdown,
294
  use_websearch_checkbox,
295
  index_files,
 
296
  ],
297
  [chatbot, history, status_display, token_count],
298
  show_progress=True,
@@ -315,6 +322,7 @@ with gr.Blocks(
315
  model_select_dropdown,
316
  use_websearch_checkbox,
317
  index_files,
 
318
  ],
319
  [chatbot, history, status_display, token_count],
320
  show_progress=True,
@@ -339,6 +347,7 @@ with gr.Blocks(
339
  temperature,
340
  use_streaming_checkbox,
341
  model_select_dropdown,
 
342
  ],
343
  [chatbot, history, status_display, token_count],
344
  show_progress=True,
@@ -363,6 +372,7 @@ with gr.Blocks(
363
  temperature,
364
  gr.State(0),
365
  model_select_dropdown,
 
366
  ],
367
  [chatbot, history, status_display, token_count],
368
  show_progress=True,
 
170
  label="实时传输回答", value=True, visible=enable_streaming_option
171
  )
172
  use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
173
+ language_select_dropdown = gr.Dropdown(
174
+ label="选择回复语言(针对搜索&索引功能)",
175
+ choices=REPLY_LANGUAGES,
176
+ multiselect=False,
177
+ value=REPLY_LANGUAGES[0]
178
+ )
179
  index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
180
 
181
  with gr.Tab(label="Prompt"):
 
299
  model_select_dropdown,
300
  use_websearch_checkbox,
301
  index_files,
302
+ language_select_dropdown,
303
  ],
304
  [chatbot, history, status_display, token_count],
305
  show_progress=True,
 
322
  model_select_dropdown,
323
  use_websearch_checkbox,
324
  index_files,
325
+ language_select_dropdown,
326
  ],
327
  [chatbot, history, status_display, token_count],
328
  show_progress=True,
 
347
  temperature,
348
  use_streaming_checkbox,
349
  model_select_dropdown,
350
+ language_select_dropdown,
351
  ],
352
  [chatbot, history, status_display, token_count],
353
  show_progress=True,
 
372
  temperature,
373
  gr.State(0),
374
  model_select_dropdown,
375
+ language_select_dropdown,
376
  ],
377
  [chatbot, history, status_display, token_count],
378
  show_progress=True,
chat_func.py CHANGED
@@ -262,9 +262,12 @@ def predict(
262
  selected_model=MODELS[0],
263
  use_websearch=False,
264
  files = None,
 
265
  should_check_token_count=True,
266
  ): # repetition_penalty, top_k
267
  logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
 
 
268
  if files:
269
  msg = "构建索引中……(这可能需要比较久的时间)"
270
  logging.info(msg)
@@ -272,7 +275,7 @@ def predict(
272
  index = construct_index(openai_api_key, file_src=files)
273
  msg = "索引构建完成,获取回答中……"
274
  yield chatbot, history, msg, all_token_counts
275
- history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot)
276
  yield chatbot, history, status_text, all_token_counts
277
  return
278
 
@@ -292,6 +295,7 @@ def predict(
292
  replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
293
  .replace("{query}", inputs)
294
  .replace("{web_results}", "\n\n".join(web_results))
 
295
  )
296
  else:
297
  link_references = ""
@@ -389,6 +393,7 @@ def retry(
389
  temperature,
390
  stream=False,
391
  selected_model=MODELS[0],
 
392
  ):
393
  logging.info("重试中……")
394
  if len(history) == 0:
@@ -408,6 +413,7 @@ def retry(
408
  temperature,
409
  stream=stream,
410
  selected_model=selected_model,
 
411
  )
412
  logging.info("重试中……")
413
  for x in iter:
@@ -425,6 +431,7 @@ def reduce_token_size(
425
  temperature,
426
  max_token_count,
427
  selected_model=MODELS[0],
 
428
  ):
429
  logging.info("开始减少token数量……")
430
  iter = predict(
@@ -438,6 +445,7 @@ def reduce_token_size(
438
  temperature,
439
  selected_model=selected_model,
440
  should_check_token_count=False,
 
441
  )
442
  logging.info(f"chatbot: {chatbot}")
443
  flag = False
@@ -453,4 +461,4 @@ def reduce_token_size(
453
  sum(token_count) if len(token_count) > 0 else 0,
454
  ), token_count
455
  logging.info(msg)
456
- logging.info("减少token数量完毕")
 
262
  selected_model=MODELS[0],
263
  use_websearch=False,
264
  files = None,
265
+ reply_language="中文",
266
  should_check_token_count=True,
267
  ): # repetition_penalty, top_k
268
  logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
269
+ if reply_language == "跟随问题语言(不稳定)":
270
+ reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
271
  if files:
272
  msg = "构建索引中……(这可能需要比较久的时间)"
273
  logging.info(msg)
 
275
  index = construct_index(openai_api_key, file_src=files)
276
  msg = "索引构建完成,获取回答中……"
277
  yield chatbot, history, msg, all_token_counts
278
+ history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot, reply_language)
279
  yield chatbot, history, status_text, all_token_counts
280
  return
281
 
 
295
  replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
296
  .replace("{query}", inputs)
297
  .replace("{web_results}", "\n\n".join(web_results))
298
+ .replace("{reply_language}", reply_language )
299
  )
300
  else:
301
  link_references = ""
 
393
  temperature,
394
  stream=False,
395
  selected_model=MODELS[0],
396
+ reply_language="中文",
397
  ):
398
  logging.info("重试中……")
399
  if len(history) == 0:
 
413
  temperature,
414
  stream=stream,
415
  selected_model=selected_model,
416
+ reply_language=reply_language,
417
  )
418
  logging.info("重试中……")
419
  for x in iter:
 
431
  temperature,
432
  max_token_count,
433
  selected_model=MODELS[0],
434
+ reply_language="中文",
435
  ):
436
  logging.info("开始减少token数量……")
437
  iter = predict(
 
445
  temperature,
446
  selected_model=selected_model,
447
  should_check_token_count=False,
448
+ reply_language=reply_language,
449
  )
450
  logging.info(f"chatbot: {chatbot}")
451
  flag = False
 
461
  sum(token_count) if len(token_count) > 0 else 0,
462
  ), token_count
463
  logging.info(msg)
464
+ logging.info("减少token数量完毕")
llama_func.py CHANGED
@@ -102,6 +102,7 @@ def chat_ai(
102
  question,
103
  context,
104
  chatbot,
 
105
  ):
106
  os.environ["OPENAI_API_KEY"] = api_key
107
 
@@ -116,6 +117,7 @@ def chat_ai(
116
  SIM_K,
117
  INDEX_QUERY_TEMPRATURE,
118
  context,
 
119
  )
120
  if response is None:
121
  status_text = "查询失败,请换个问法试试"
@@ -139,6 +141,7 @@ def ask_ai(
139
  sim_k=1,
140
  temprature=0,
141
  prefix_messages=[],
 
142
  ):
143
  os.environ["OPENAI_API_KEY"] = api_key
144
 
@@ -153,8 +156,8 @@ def ask_ai(
153
  )
154
 
155
  response = None # Initialize response variable to avoid UnboundLocalError
156
- qa_prompt = QuestionAnswerPrompt(prompt_tmpl)
157
- rf_prompt = RefinePrompt(refine_tmpl)
158
  response = index.query(
159
  question,
160
  llm_predictor=llm_predictor,
 
102
  question,
103
  context,
104
  chatbot,
105
+ reply_language,
106
  ):
107
  os.environ["OPENAI_API_KEY"] = api_key
108
 
 
117
  SIM_K,
118
  INDEX_QUERY_TEMPRATURE,
119
  context,
120
+ reply_language,
121
  )
122
  if response is None:
123
  status_text = "查询失败,请换个问法试试"
 
141
  sim_k=1,
142
  temprature=0,
143
  prefix_messages=[],
144
+ reply_language="中文",
145
  ):
146
  os.environ["OPENAI_API_KEY"] = api_key
147
 
 
156
  )
157
 
158
  response = None # Initialize response variable to avoid UnboundLocalError
159
+ qa_prompt = QuestionAnswerPrompt(prompt_tmpl.replace("{reply_language}", reply_language))
160
+ rf_prompt = RefinePrompt(refine_tmpl.replace("{reply_language}", reply_language))
161
  response = index.query(
162
  question,
163
  llm_predictor=llm_predictor,
presets.py CHANGED
@@ -49,6 +49,13 @@ MODELS = [
49
  "gpt-4-32k-0314",
50
  ] # 可选的模型
51
 
 
 
 
 
 
 
 
52
 
53
  WEBSEARCH_PTOMPT_TEMPLATE = """\
54
  Web search results:
@@ -58,7 +65,8 @@ Current date: {current_date}
58
 
59
  Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
60
  Query: {query}
61
- Reply in 中文"""
 
62
 
63
  PROMPT_TEMPLATE = """\
64
  Context information is below.
@@ -71,7 +79,7 @@ Make sure to cite results using [number] notation after the reference.
71
  If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
72
  Use prior knowledge only if the given context didn't provide enough information.
73
  Answer the question: {query_str}
74
- Reply in 中文
75
  """
76
 
77
  REFINE_TEMPLATE = """\
@@ -83,6 +91,6 @@ We have the opportunity to refine the existing answer
83
  {context_msg}
84
  ------------
85
  Given the new context, refine the original answer to better
86
- Answer in the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch.
87
  If the context isn't useful, return the original answer.
88
  """
 
49
  "gpt-4-32k-0314",
50
  ] # 可选的模型
51
 
52
+ REPLY_LANGUAGES = [
53
+ "中文",
54
+ "English",
55
+ "日本語",
56
+ "跟随问题语言(不稳定)"
57
+ ]
58
+
59
 
60
  WEBSEARCH_PTOMPT_TEMPLATE = """\
61
  Web search results:
 
65
 
66
  Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
67
  Query: {query}
68
+ Reply in {reply_language}
69
+ """
70
 
71
  PROMPT_TEMPLATE = """\
72
  Context information is below.
 
79
  If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
80
  Use prior knowledge only if the given context didn't provide enough information.
81
  Answer the question: {query_str}
82
+ Reply in {reply_language}
83
  """
84
 
85
  REFINE_TEMPLATE = """\
 
91
  {context_msg}
92
  ------------
93
  Given the new context, refine the original answer to better
94
+ Reply in {reply_language}
95
  If the context isn't useful, return the original answer.
96
  """