Spaces:
Runtime error
Runtime error
better streaming
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import requests
|
|
3 |
import json
|
4 |
import threading
|
5 |
import os
|
|
|
6 |
from requests.exceptions import RequestException
|
7 |
|
8 |
stop_generation = threading.Event()
|
@@ -16,6 +17,9 @@ headers = {
|
|
16 |
|
17 |
session = requests.Session()
|
18 |
|
|
|
|
|
|
|
19 |
def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
|
20 |
global stop_generation, session
|
21 |
stop_generation.clear()
|
@@ -27,6 +31,8 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
|
|
27 |
history_format.append({"role": "assistant", "content": assistant})
|
28 |
history_format.append({"role": "user", "content": message})
|
29 |
|
|
|
|
|
30 |
data = {
|
31 |
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
32 |
"messages": history_format,
|
@@ -58,15 +64,12 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
|
|
58 |
content = json_data['choices'][0]['delta'].get('content', '')
|
59 |
if content:
|
60 |
partial_message += content
|
61 |
-
|
62 |
-
print(f"<|assistant|>\n{partial_message}\n")
|
63 |
yield partial_message
|
64 |
except json.JSONDecodeError:
|
65 |
continue
|
66 |
|
67 |
if partial_message:
|
68 |
-
|
69 |
-
print(f"<|assistant|>\n{partial_message}\n")
|
70 |
yield partial_message
|
71 |
|
72 |
except RequestException as e:
|
@@ -99,11 +102,11 @@ def import_chat(custom_format_string):
|
|
99 |
return None, None
|
100 |
|
101 |
def export_chat(history, system_prompt):
|
102 |
-
export_data = f"<|system
|
103 |
for user_msg, assistant_msg in history:
|
104 |
-
export_data += f"<|user
|
105 |
if assistant_msg:
|
106 |
-
export_data += f"<|assistant
|
107 |
return export_data
|
108 |
|
109 |
def stop_generation_func():
|
@@ -139,8 +142,7 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
|
|
139 |
max_tokens = gr.Slider(1, 1024, value=256, step=1, label="Max Output (max_tokens)")
|
140 |
|
141 |
def user(user_message, history):
|
142 |
-
|
143 |
-
print(f"<|user|>\n{user_message}\n")
|
144 |
return "", history + [[user_message, None]]
|
145 |
|
146 |
def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
|
|
|
3 |
import json
|
4 |
import threading
|
5 |
import os
|
6 |
+
import datetime
|
7 |
from requests.exceptions import RequestException
|
8 |
|
9 |
stop_generation = threading.Event()
|
|
|
17 |
|
18 |
session = requests.Session()
|
19 |
|
20 |
+
def get_timestamp():
|
21 |
+
return datetime.datetime.now().strftime("%H:%M:%S")
|
22 |
+
|
23 |
def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
|
24 |
global stop_generation, session
|
25 |
stop_generation.clear()
|
|
|
31 |
history_format.append({"role": "assistant", "content": assistant})
|
32 |
history_format.append({"role": "user", "content": message})
|
33 |
|
34 |
+
print(f"<|system|> {system_prompt}")
|
35 |
+
|
36 |
data = {
|
37 |
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
38 |
"messages": history_format,
|
|
|
64 |
content = json_data['choices'][0]['delta'].get('content', '')
|
65 |
if content:
|
66 |
partial_message += content
|
|
|
|
|
67 |
yield partial_message
|
68 |
except json.JSONDecodeError:
|
69 |
continue
|
70 |
|
71 |
if partial_message:
|
72 |
+
print(f"<|assistant|> {partial_message}")
|
|
|
73 |
yield partial_message
|
74 |
|
75 |
except RequestException as e:
|
|
|
102 |
return None, None
|
103 |
|
104 |
def export_chat(history, system_prompt):
|
105 |
+
export_data = f"<|system|> {system_prompt}\n\n"
|
106 |
for user_msg, assistant_msg in history:
|
107 |
+
export_data += f"<|user|> {user_msg}\n\n"
|
108 |
if assistant_msg:
|
109 |
+
export_data += f"<|assistant|> {assistant_msg}\n\n"
|
110 |
return export_data
|
111 |
|
112 |
def stop_generation_func():
|
|
|
142 |
max_tokens = gr.Slider(1, 1024, value=256, step=1, label="Max Output (max_tokens)")
|
143 |
|
144 |
def user(user_message, history):
|
145 |
+
print(f"{get_timestamp()} <|user|> {user_message}")
|
|
|
146 |
return "", history + [[user_message, None]]
|
147 |
|
148 |
def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
|