Spaces:
Running
Running
File size: 6,206 Bytes
bfa6661 3af0bbd bfa6661 2420d62 bfa6661 190b547 bfa6661 190b547 bfa6661 eee4cb3 bfa6661 dcaa7a1 bfa6661 3af0bbd bfa6661 3af0bbd bfa6661 190b547 bfa6661 3af0bbd bfa6661 2420d62 3af0bbd 190b547 3af0bbd bfa6661 2420d62 bfa6661 190b547 bfa6661 3af0bbd bfa6661 3af0bbd eee4cb3 3af0bbd 190b547 2420d62 3af0bbd 190b547 3af0bbd 190b547 3af0bbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
'''
Contributed by SagsMug. Modified by binary-husky
https://github.com/oobabooga/text-generation-webui/pull/175
'''
import asyncio
import json
import random
import string
import websockets
import logging
import time
import threading
import importlib
from toolbox import get_conf
LLM_MODEL, = get_conf('LLM_MODEL')
# "TGUI:galactica-1.3b@localhost:7860"
model_name, addr_port = LLM_MODEL.split('@')
assert ':' in addr_port, "LLM_MODEL 格式不正确!" + LLM_MODEL
addr, port = addr_port.split(':')
def random_hash():
letters = string.ascii_lowercase + string.digits
return ''.join(random.choice(letters) for i in range(9))
async def run(context, max_token=512):
params = {
'max_new_tokens': max_token,
'do_sample': True,
'temperature': 0.5,
'top_p': 0.9,
'typical_p': 1,
'repetition_penalty': 1.05,
'encoder_repetition_penalty': 1.0,
'top_k': 0,
'min_length': 0,
'no_repeat_ngram_size': 0,
'num_beams': 1,
'penalty_alpha': 0,
'length_penalty': 1,
'early_stopping': True,
'seed': -1,
}
session = random_hash()
async with websockets.connect(f"ws://{addr}:{port}/queue/join") as websocket:
while content := json.loads(await websocket.recv()):
#Python3.10 syntax, replace with if elif on older
if content["msg"] == "send_hash":
await websocket.send(json.dumps({
"session_hash": session,
"fn_index": 12
}))
elif content["msg"] == "estimation":
pass
elif content["msg"] == "send_data":
await websocket.send(json.dumps({
"session_hash": session,
"fn_index": 12,
"data": [
context,
params['max_new_tokens'],
params['do_sample'],
params['temperature'],
params['top_p'],
params['typical_p'],
params['repetition_penalty'],
params['encoder_repetition_penalty'],
params['top_k'],
params['min_length'],
params['no_repeat_ngram_size'],
params['num_beams'],
params['penalty_alpha'],
params['length_penalty'],
params['early_stopping'],
params['seed'],
]
}))
elif content["msg"] == "process_starts":
pass
elif content["msg"] in ["process_generating", "process_completed"]:
yield content["output"]["data"][0]
# You can search for your desired end indicator and
# stop generation by closing the websocket here
if (content["msg"] == "process_completed"):
break
def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prompt='', stream = True, additional_fn=None):
"""
发送至chatGPT,流式获取输出。
用于基础的对话功能。
inputs 是本次问询的输入
top_p, temperature是chatGPT的内部调优参数
history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
additional_fn代表点击的哪个按钮,按钮见functional.py
"""
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
raw_input = "What I would like to say is the following: " + inputs
logging.info(f'[raw_input] {raw_input}')
history.extend([inputs, ""])
chatbot.append([inputs, ""])
yield chatbot, history, "等待响应"
prompt = inputs
tgui_say = ""
mutable = ["", time.time()]
def run_coorotine(mutable):
async def get_result(mutable):
async for response in run(prompt):
print(response[len(mutable[0]):])
mutable[0] = response
if (time.time() - mutable[1]) > 3:
print('exit when no listener')
break
asyncio.run(get_result(mutable))
thread_listen = threading.Thread(target=run_coorotine, args=(mutable,), daemon=True)
thread_listen.start()
while thread_listen.is_alive():
time.sleep(1)
mutable[1] = time.time()
# Print intermediate steps
if tgui_say != mutable[0]:
tgui_say = mutable[0]
history[-1] = tgui_say
chatbot[-1] = (history[-2], history[-1])
yield chatbot, history, "status_text"
logging.info(f'[response] {tgui_say}')
def predict_tgui_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
raw_input = "What I would like to say is the following: " + inputs
prompt = inputs
tgui_say = ""
mutable = ["", time.time()]
def run_coorotine(mutable):
async def get_result(mutable):
async for response in run(prompt, max_token=20):
print(response[len(mutable[0]):])
mutable[0] = response
if (time.time() - mutable[1]) > 3:
print('exit when no listener')
break
asyncio.run(get_result(mutable))
thread_listen = threading.Thread(target=run_coorotine, args=(mutable,))
thread_listen.start()
while thread_listen.is_alive():
time.sleep(1)
mutable[1] = time.time()
tgui_say = mutable[0]
return tgui_say
|