Corvius commited on
Commit
4464e12
Β·
verified Β·
1 Parent(s): bcda566

better streaming

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -3,6 +3,7 @@ import requests
3
  import json
4
  import threading
5
  import os
 
6
  from requests.exceptions import RequestException
7
 
8
  stop_generation = threading.Event()
@@ -16,6 +17,9 @@ headers = {
16
 
17
  session = requests.Session()
18
 
 
 
 
19
  def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
20
  global stop_generation, session
21
  stop_generation.clear()
@@ -27,6 +31,8 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
27
  history_format.append({"role": "assistant", "content": assistant})
28
  history_format.append({"role": "user", "content": message})
29
 
 
 
30
  data = {
31
  "model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
32
  "messages": history_format,
@@ -58,15 +64,12 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
58
  content = json_data['choices'][0]['delta'].get('content', '')
59
  if content:
60
  partial_message += content
61
-
62
- print(f"<|assistant|>\n{partial_message}\n")
63
  yield partial_message
64
  except json.JSONDecodeError:
65
  continue
66
 
67
  if partial_message:
68
-
69
- print(f"<|assistant|>\n{partial_message}\n")
70
  yield partial_message
71
 
72
  except RequestException as e:
@@ -99,11 +102,11 @@ def import_chat(custom_format_string):
99
  return None, None
100
 
101
  def export_chat(history, system_prompt):
102
- export_data = f"<|system|>\n{system_prompt}\n\n"
103
  for user_msg, assistant_msg in history:
104
- export_data += f"<|user|>\n{user_msg}\n\n"
105
  if assistant_msg:
106
- export_data += f"<|assistant|>\n{assistant_msg}\n\n"
107
  return export_data
108
 
109
  def stop_generation_func():
@@ -139,8 +142,7 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
139
  max_tokens = gr.Slider(1, 1024, value=256, step=1, label="Max Output (max_tokens)")
140
 
141
  def user(user_message, history):
142
-
143
- print(f"<|user|>\n{user_message}\n")
144
  return "", history + [[user_message, None]]
145
 
146
  def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
 
3
  import json
4
  import threading
5
  import os
6
+ import datetime
7
  from requests.exceptions import RequestException
8
 
9
  stop_generation = threading.Event()
 
17
 
18
  session = requests.Session()
19
 
20
+ def get_timestamp():
21
+ return datetime.datetime.now().strftime("%H:%M:%S")
22
+
23
  def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
24
  global stop_generation, session
25
  stop_generation.clear()
 
31
  history_format.append({"role": "assistant", "content": assistant})
32
  history_format.append({"role": "user", "content": message})
33
 
34
+ print(f"<|system|> {system_prompt}")
35
+
36
  data = {
37
  "model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
38
  "messages": history_format,
 
64
  content = json_data['choices'][0]['delta'].get('content', '')
65
  if content:
66
  partial_message += content
 
 
67
  yield partial_message
68
  except json.JSONDecodeError:
69
  continue
70
 
71
  if partial_message:
72
+ print(f"<|assistant|> {partial_message}")
 
73
  yield partial_message
74
 
75
  except RequestException as e:
 
102
  return None, None
103
 
104
  def export_chat(history, system_prompt):
105
+ export_data = f"<|system|> {system_prompt}\n\n"
106
  for user_msg, assistant_msg in history:
107
+ export_data += f"<|user|> {user_msg}\n\n"
108
  if assistant_msg:
109
+ export_data += f"<|assistant|> {assistant_msg}\n\n"
110
  return export_data
111
 
112
  def stop_generation_func():
 
142
  max_tokens = gr.Slider(1, 1024, value=256, step=1, label="Max Output (max_tokens)")
143
 
144
  def user(user_message, history):
145
+ print(f"{get_timestamp()} <|user|> {user_message}")
 
146
  return "", history + [[user_message, None]]
147
 
148
  def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):