JohnSmith9982
commited on
Commit
·
890e483
1
Parent(s):
c5e5944
Create utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains all of the components that can be used with Gradio Interface / Blocks.
|
2 |
+
Along with the docs for each component, you can find the names of example demos that use
|
3 |
+
each component. These demos are located in the `demo` directory."""
|
4 |
+
|
5 |
+
from __future__ import annotations
|
6 |
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type
|
7 |
+
import json
|
8 |
+
import gradio as gr
|
9 |
+
# import openai
|
10 |
+
import os
|
11 |
+
import traceback
|
12 |
+
import requests
|
13 |
+
# import markdown
|
14 |
+
import csv
|
15 |
+
import mdtex2html
|
16 |
+
|
17 |
+
if TYPE_CHECKING:
|
18 |
+
from typing import TypedDict
|
19 |
+
|
20 |
+
class DataframeData(TypedDict):
|
21 |
+
headers: List[str]
|
22 |
+
data: List[List[str | int | bool]]
|
23 |
+
|
24 |
+
initial_prompt = "You are a helpful assistant."
|
25 |
+
API_URL = "https://api.openai.com/v1/chat/completions"
|
26 |
+
HISTORY_DIR = "history"
|
27 |
+
TEMPLATES_DIR = "templates"
|
28 |
+
|
29 |
+
def postprocess(
|
30 |
+
self, y: List[Tuple[str | None, str | None]]
|
31 |
+
) -> List[Tuple[str | None, str | None]]:
|
32 |
+
"""
|
33 |
+
Parameters:
|
34 |
+
y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
|
35 |
+
Returns:
|
36 |
+
List of tuples representing the message and response. Each message and response will be a string of HTML.
|
37 |
+
"""
|
38 |
+
if y is None:
|
39 |
+
return []
|
40 |
+
for i, (message, response) in enumerate(y):
|
41 |
+
y[i] = (
|
42 |
+
# None if message is None else markdown.markdown(message),
|
43 |
+
# None if response is None else markdown.markdown(response),
|
44 |
+
None if message is None else mdtex2html.convert(message),
|
45 |
+
None if response is None else mdtex2html.convert(response),
|
46 |
+
)
|
47 |
+
return y
|
48 |
+
|
49 |
+
def parse_text(text):
|
50 |
+
lines = text.split("\n")
|
51 |
+
lines = [line for line in lines if line != ""]
|
52 |
+
count = 0
|
53 |
+
firstline = False
|
54 |
+
for i, line in enumerate(lines):
|
55 |
+
if "```" in line:
|
56 |
+
count += 1
|
57 |
+
items = line.split('`')
|
58 |
+
if count % 2 == 1:
|
59 |
+
lines[i] = f'<pre><code class="language-{items[-1]}">'
|
60 |
+
else:
|
61 |
+
lines[i] = f'<br></code></pre>'
|
62 |
+
else:
|
63 |
+
if i > 0:
|
64 |
+
if count % 2 == 1:
|
65 |
+
# line = line.replace("‘", "'")
|
66 |
+
# line = line.replace("“", '"')
|
67 |
+
line = line.replace("`", "\`")
|
68 |
+
# line = line.replace("\"", "`\"`")
|
69 |
+
# line = line.replace("\'", "`\'`")
|
70 |
+
# line = line.replace("'``'", "''")
|
71 |
+
# line = line.replace("&", "&")
|
72 |
+
line = line.replace("<", "<")
|
73 |
+
line = line.replace(">", ">")
|
74 |
+
line = line.replace(" ", " ")
|
75 |
+
line = line.replace("*", "*")
|
76 |
+
line = line.replace("_", "_")
|
77 |
+
line = line.replace("-", "-")
|
78 |
+
line = line.replace(".", ".")
|
79 |
+
line = line.replace("!", "!")
|
80 |
+
line = line.replace("(", "(")
|
81 |
+
line = line.replace(")", ")")
|
82 |
+
line = line.replace("$", "$")
|
83 |
+
lines[i] = "<br>"+line
|
84 |
+
text = "".join(lines)
|
85 |
+
return text
|
86 |
+
|
87 |
+
def predict(inputs, top_p, temperature, openai_api_key, chatbot=[], history=[], system_prompt=initial_prompt, retry=False, summary=False, retry_on_crash = False, stream = True): # repetition_penalty, top_k
|
88 |
+
|
89 |
+
if retry_on_crash:
|
90 |
+
retry = True
|
91 |
+
|
92 |
+
headers = {
|
93 |
+
"Content-Type": "application/json",
|
94 |
+
"Authorization": f"Bearer {openai_api_key}"
|
95 |
+
}
|
96 |
+
|
97 |
+
chat_counter = len(history) // 2
|
98 |
+
|
99 |
+
print(f"chat_counter - {chat_counter}")
|
100 |
+
|
101 |
+
messages = []
|
102 |
+
if chat_counter:
|
103 |
+
for index in range(0, 2*chat_counter, 2):
|
104 |
+
temp1 = {}
|
105 |
+
temp1["role"] = "user"
|
106 |
+
temp1["content"] = history[index]
|
107 |
+
temp2 = {}
|
108 |
+
temp2["role"] = "assistant"
|
109 |
+
temp2["content"] = history[index+1]
|
110 |
+
if temp1["content"] != "":
|
111 |
+
if temp2["content"] != "" or retry:
|
112 |
+
messages.append(temp1)
|
113 |
+
messages.append(temp2)
|
114 |
+
else:
|
115 |
+
messages[-1]['content'] = temp2['content']
|
116 |
+
if retry and chat_counter:
|
117 |
+
if retry_on_crash:
|
118 |
+
messages = messages[-6:]
|
119 |
+
messages.pop()
|
120 |
+
elif summary:
|
121 |
+
history = [*[i["content"] for i in messages[-2:]], "我们刚刚聊了什么?"]
|
122 |
+
messages.append(compose_user(
|
123 |
+
"请帮我总结一下上述对话的内容,实现减少字数的同时,保证对话的质量。在总结中不要加入这一句话。"))
|
124 |
+
else:
|
125 |
+
temp3 = {}
|
126 |
+
temp3["role"] = "user"
|
127 |
+
temp3["content"] = inputs
|
128 |
+
messages.append(temp3)
|
129 |
+
chat_counter += 1
|
130 |
+
messages = [compose_system(system_prompt), *messages]
|
131 |
+
# messages
|
132 |
+
payload = {
|
133 |
+
"model": "gpt-3.5-turbo",
|
134 |
+
"messages": messages, # [{"role": "user", "content": f"{inputs}"}],
|
135 |
+
"temperature": temperature, # 1.0,
|
136 |
+
"top_p": top_p, # 1.0,
|
137 |
+
"n": 1,
|
138 |
+
"stream": stream,
|
139 |
+
"presence_penalty": 0,
|
140 |
+
"frequency_penalty": 0,
|
141 |
+
}
|
142 |
+
|
143 |
+
if not summary:
|
144 |
+
history.append(inputs)
|
145 |
+
else:
|
146 |
+
print("精简中...")
|
147 |
+
|
148 |
+
print(f"payload: {payload}")
|
149 |
+
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
|
150 |
+
try:
|
151 |
+
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
|
152 |
+
except:
|
153 |
+
history.append("")
|
154 |
+
chatbot.append(inputs, "")
|
155 |
+
yield history, chatbot, f"出现了网络错误"
|
156 |
+
return
|
157 |
+
|
158 |
+
token_counter = 0
|
159 |
+
partial_words = ""
|
160 |
+
|
161 |
+
counter = 0
|
162 |
+
if stream:
|
163 |
+
chatbot.append((parse_text(history[-1]), ""))
|
164 |
+
for chunk in response.iter_lines():
|
165 |
+
if counter == 0:
|
166 |
+
counter += 1
|
167 |
+
continue
|
168 |
+
counter += 1
|
169 |
+
# check whether each line is non-empty
|
170 |
+
if chunk:
|
171 |
+
# decode each line as response data is in bytes
|
172 |
+
try:
|
173 |
+
if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
|
174 |
+
chunkjson = json.loads(chunk.decode()[6:])
|
175 |
+
status_text = f"id: {chunkjson['id']}, finish_reason: {chunkjson['choices'][0]['finish_reason']}"
|
176 |
+
yield chatbot, history, status_text
|
177 |
+
break
|
178 |
+
except Exception as e:
|
179 |
+
traceback.print_exc()
|
180 |
+
if not retry_on_crash:
|
181 |
+
print("正在尝试使用缩短的context重新生成……")
|
182 |
+
chatbot.pop()
|
183 |
+
history.append("")
|
184 |
+
yield next(predict(inputs, top_p, temperature, openai_api_key, chatbot, history, system_prompt, retry, summary=False, retry_on_crash=True, stream=False))
|
185 |
+
else:
|
186 |
+
msg = "☹️发生了错误:生成失败,请检查网络"
|
187 |
+
print(msg)
|
188 |
+
history.append(inputs, "")
|
189 |
+
chatbot.append(inputs, msg)
|
190 |
+
yield chatbot, history, "status: ERROR"
|
191 |
+
break
|
192 |
+
chunkjson = json.loads(chunk.decode()[6:])
|
193 |
+
status_text = f"id: {chunkjson['id']}, finish_reason: {chunkjson['choices'][0]['finish_reason']}"
|
194 |
+
partial_words = partial_words + \
|
195 |
+
json.loads(chunk.decode()[6:])[
|
196 |
+
'choices'][0]["delta"]["content"]
|
197 |
+
if token_counter == 0:
|
198 |
+
history.append(" " + partial_words)
|
199 |
+
else:
|
200 |
+
history[-1] = partial_words
|
201 |
+
chatbot[-1] = (parse_text(history[-2]), parse_text(history[-1]))
|
202 |
+
token_counter += 1
|
203 |
+
yield chatbot, history, status_text
|
204 |
+
else:
|
205 |
+
try:
|
206 |
+
responsejson = json.loads(response.text)
|
207 |
+
content = responsejson["choices"][0]["message"]["content"]
|
208 |
+
history.append(content)
|
209 |
+
chatbot.append((parse_text(history[-2]), parse_text(content)))
|
210 |
+
status_text = "精简完成"
|
211 |
+
except:
|
212 |
+
chatbot.append((parse_text(history[-1]), "☹️发生了错误,请检查网络连接或者稍后再试。"))
|
213 |
+
status_text = "status: ERROR"
|
214 |
+
yield chatbot, history, status_text
|
215 |
+
|
216 |
+
|
217 |
+
|
218 |
+
def delete_last_conversation(chatbot, history):
|
219 |
+
if "☹️发生了错误" in chatbot[-1][1]:
|
220 |
+
chatbot.pop()
|
221 |
+
print(history)
|
222 |
+
return chatbot, history
|
223 |
+
history.pop()
|
224 |
+
history.pop()
|
225 |
+
print(history)
|
226 |
+
return chatbot, history
|
227 |
+
|
228 |
+
def save_chat_history(filename, system, history, chatbot):
|
229 |
+
if filename == "":
|
230 |
+
return
|
231 |
+
if not filename.endswith(".json"):
|
232 |
+
filename += ".json"
|
233 |
+
os.makedirs(HISTORY_DIR, exist_ok=True)
|
234 |
+
json_s = {"system": system, "history": history, "chatbot": chatbot}
|
235 |
+
print(json_s)
|
236 |
+
with open(os.path.join(HISTORY_DIR, filename), "w") as f:
|
237 |
+
json.dump(json_s, f)
|
238 |
+
|
239 |
+
|
240 |
+
def load_chat_history(filename):
|
241 |
+
with open(os.path.join(HISTORY_DIR, filename), "r") as f:
|
242 |
+
json_s = json.load(f)
|
243 |
+
print(json_s)
|
244 |
+
return filename, json_s["system"], json_s["history"], json_s["chatbot"]
|
245 |
+
|
246 |
+
|
247 |
+
def get_file_names(dir, plain=False, filetype=".json"):
|
248 |
+
# find all json files in the current directory and return their names
|
249 |
+
try:
|
250 |
+
files = sorted([f for f in os.listdir(dir) if f.endswith(filetype)])
|
251 |
+
except FileNotFoundError:
|
252 |
+
files = []
|
253 |
+
if plain:
|
254 |
+
return files
|
255 |
+
else:
|
256 |
+
return gr.Dropdown.update(choices=files)
|
257 |
+
|
258 |
+
def get_history_names(plain=False):
|
259 |
+
return get_file_names(HISTORY_DIR, plain)
|
260 |
+
|
261 |
+
def load_template(filename, mode=0):
|
262 |
+
lines = []
|
263 |
+
with open(os.path.join(TEMPLATES_DIR, filename), "r", encoding="utf8") as csvfile:
|
264 |
+
reader = csv.reader(csvfile)
|
265 |
+
lines = list(reader)
|
266 |
+
lines = lines[1:]
|
267 |
+
if mode == 1:
|
268 |
+
return sorted([row[0] for row in lines])
|
269 |
+
elif mode == 2:
|
270 |
+
return {row[0]:row[1] for row in lines}
|
271 |
+
else:
|
272 |
+
return {row[0]:row[1] for row in lines}, gr.Dropdown.update(choices=sorted([row[0] for row in lines]))
|
273 |
+
|
274 |
+
def get_template_names(plain=False):
|
275 |
+
return get_file_names(TEMPLATES_DIR, plain, filetype=".csv")
|
276 |
+
|
277 |
+
def reset_state():
|
278 |
+
return [], []
|
279 |
+
|
280 |
+
|
281 |
+
def compose_system(system_prompt):
|
282 |
+
return {"role": "system", "content": system_prompt}
|
283 |
+
|
284 |
+
|
285 |
+
def compose_user(user_input):
|
286 |
+
return {"role": "user", "content": user_input}
|
287 |
+
|
288 |
+
|
289 |
+
def reset_textbox():
|
290 |
+
return gr.update(value='')
|