Spaces:
Sleeping
Sleeping
Tobias Bergmann
commited on
Commit
·
785de3c
1
Parent(s):
668ee0d
streaming per token
Browse files
app.py
CHANGED
@@ -29,10 +29,10 @@ pipe = Llama(
|
|
29 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
|
30 |
if not message:
|
31 |
return "", history
|
32 |
-
|
33 |
prompt = message
|
34 |
history.append([message, ""])
|
35 |
-
|
36 |
# Initialize reply for this round
|
37 |
reply = ""
|
38 |
|
@@ -44,12 +44,13 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
|
|
44 |
stream=True
|
45 |
)
|
46 |
|
|
|
47 |
for output in stream:
|
48 |
new_text = output['choices'][0]['text']
|
49 |
-
reply += new_text
|
50 |
-
history[-1][1] = reply
|
51 |
-
yield "", history
|
52 |
-
|
53 |
|
54 |
with gr.Blocks() as demo:
|
55 |
gr.Markdown(DESCRIPTION)
|
@@ -64,4 +65,4 @@ with gr.Blocks() as demo:
|
|
64 |
)
|
65 |
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
|
66 |
|
67 |
-
demo.queue().launch()
|
|
|
29 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS):
|
30 |
if not message:
|
31 |
return "", history
|
32 |
+
|
33 |
prompt = message
|
34 |
history.append([message, ""])
|
35 |
+
|
36 |
# Initialize reply for this round
|
37 |
reply = ""
|
38 |
|
|
|
44 |
stream=True
|
45 |
)
|
46 |
|
47 |
+
# Send each token stream output to the user
|
48 |
for output in stream:
|
49 |
new_text = output['choices'][0]['text']
|
50 |
+
reply += new_text
|
51 |
+
history[-1][1] = reply # Update the current reply in history
|
52 |
+
yield "", history
|
53 |
+
|
54 |
|
55 |
with gr.Blocks() as demo:
|
56 |
gr.Markdown(DESCRIPTION)
|
|
|
65 |
)
|
66 |
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot])
|
67 |
|
68 |
+
demo.queue().launch()
|