Spaces:
Sleeping
Sleeping
from http import HTTPStatus | |
from toolbox import get_conf | |
import threading | |
import logging | |
timeout_bot_msg = '[Local Message] Request timeout. Network error.' | |
class QwenRequestInstance(): | |
def __init__(self): | |
import dashscope | |
self.time_to_yield_event = threading.Event() | |
self.time_to_exit_event = threading.Event() | |
self.result_buf = "" | |
def validate_key(): | |
DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY") | |
if DASHSCOPE_API_KEY == '': return False | |
return True | |
if not validate_key(): | |
raise RuntimeError('请配置 DASHSCOPE_API_KEY') | |
dashscope.api_key = get_conf("DASHSCOPE_API_KEY") | |
def generate(self, inputs, llm_kwargs, history, system_prompt): | |
# import _thread as thread | |
from dashscope import Generation | |
QWEN_MODEL = { | |
'qwen-turbo': Generation.Models.qwen_turbo, | |
'qwen-plus': Generation.Models.qwen_plus, | |
'qwen-max': Generation.Models.qwen_max, | |
}[llm_kwargs['llm_model']] | |
top_p = llm_kwargs.get('top_p', 0.8) | |
if top_p == 0: top_p += 1e-5 | |
if top_p == 1: top_p -= 1e-5 | |
self.result_buf = "" | |
responses = Generation.call( | |
model=QWEN_MODEL, | |
messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt), | |
top_p=top_p, | |
temperature=llm_kwargs.get('temperature', 1.0), | |
result_format='message', | |
stream=True, | |
incremental_output=True | |
) | |
for response in responses: | |
if response.status_code == HTTPStatus.OK: | |
if response.output.choices[0].finish_reason == 'stop': | |
yield self.result_buf | |
break | |
elif response.output.choices[0].finish_reason == 'length': | |
self.result_buf += "[Local Message] 生成长度过长,后续输出被截断" | |
yield self.result_buf | |
break | |
else: | |
self.result_buf += response.output.choices[0].message.content | |
yield self.result_buf | |
else: | |
self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}" | |
yield self.result_buf | |
break | |
logging.info(f'[raw_input] {inputs}') | |
logging.info(f'[response] {self.result_buf}') | |
return self.result_buf | |
def generate_message_payload(inputs, llm_kwargs, history, system_prompt): | |
conversation_cnt = len(history) // 2 | |
if system_prompt == '': system_prompt = 'Hello!' | |
messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}] | |
if conversation_cnt: | |
for index in range(0, 2*conversation_cnt, 2): | |
what_i_have_asked = {} | |
what_i_have_asked["role"] = "user" | |
what_i_have_asked["content"] = history[index] | |
what_gpt_answer = {} | |
what_gpt_answer["role"] = "assistant" | |
what_gpt_answer["content"] = history[index+1] | |
if what_i_have_asked["content"] != "": | |
if what_gpt_answer["content"] == "": | |
continue | |
if what_gpt_answer["content"] == timeout_bot_msg: | |
continue | |
messages.append(what_i_have_asked) | |
messages.append(what_gpt_answer) | |
else: | |
messages[-1]['content'] = what_gpt_answer['content'] | |
what_i_ask_now = {} | |
what_i_ask_now["role"] = "user" | |
what_i_ask_now["content"] = inputs | |
messages.append(what_i_ask_now) | |
return messages | |