Spaces:
Sleeping
Sleeping
Tobias Bergmann
commited on
Commit
·
4a44fb0
1
Parent(s):
91a07e0
tps field
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ pipe = Llama(
|
|
29 |
|
30 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, progress=gr.Progress()):
|
31 |
if not message:
|
32 |
-
return "", history
|
33 |
|
34 |
prompt = message
|
35 |
history.append([message, ""])
|
@@ -40,9 +40,6 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
|
|
40 |
# Initialize token count and start time
|
41 |
token_count = 0
|
42 |
start_time = time.time()
|
43 |
-
|
44 |
-
last_token_count = 0
|
45 |
-
last_time = start_time
|
46 |
|
47 |
# This will produce a generator of output chunks
|
48 |
stream = pipe(
|
@@ -58,24 +55,20 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
|
|
58 |
reply += new_text
|
59 |
token_count += len(new_text.split()) # Estimate tokens by counting spaces
|
60 |
history[-1][1] = reply # Update the current reply in history
|
61 |
-
|
62 |
-
# Calculate elapsed time
|
63 |
-
elapsed_time = time.time() -
|
64 |
-
|
65 |
if elapsed_time > 0:
|
66 |
-
|
67 |
-
tokens_per_second = (token_count - last_token_count) / elapsed_time
|
68 |
else:
|
69 |
-
|
70 |
-
|
71 |
# Update the status using gradio's progress
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
last_token_count = token_count
|
76 |
-
last_time = time.time()
|
77 |
|
78 |
-
|
79 |
|
80 |
|
81 |
with gr.Blocks() as demo:
|
@@ -89,7 +82,7 @@ with gr.Blocks() as demo:
|
|
89 |
value=DEFAULT_MAX_NEW_TOKENS,
|
90 |
label="Max New Tokens",
|
91 |
)
|
92 |
-
|
93 |
-
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot
|
94 |
|
95 |
demo.queue().launch()
|
|
|
29 |
|
30 |
def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, progress=gr.Progress()):
|
31 |
if not message:
|
32 |
+
return "", history, ""
|
33 |
|
34 |
prompt = message
|
35 |
history.append([message, ""])
|
|
|
40 |
# Initialize token count and start time
|
41 |
token_count = 0
|
42 |
start_time = time.time()
|
|
|
|
|
|
|
43 |
|
44 |
# This will produce a generator of output chunks
|
45 |
stream = pipe(
|
|
|
55 |
reply += new_text
|
56 |
token_count += len(new_text.split()) # Estimate tokens by counting spaces
|
57 |
history[-1][1] = reply # Update the current reply in history
|
58 |
+
|
59 |
+
# Calculate elapsed time and TPS
|
60 |
+
elapsed_time = time.time() - start_time
|
|
|
61 |
if elapsed_time > 0:
|
62 |
+
tps = token_count / elapsed_time
|
|
|
63 |
else:
|
64 |
+
tps = 0
|
65 |
+
|
66 |
# Update the status using gradio's progress
|
67 |
+
status_message = f"Tokens per second: {tps:.2f}"
|
68 |
+
|
69 |
+
yield "", history, status_message
|
|
|
|
|
70 |
|
71 |
+
|
72 |
|
73 |
|
74 |
with gr.Blocks() as demo:
|
|
|
82 |
value=DEFAULT_MAX_NEW_TOKENS,
|
83 |
label="Max New Tokens",
|
84 |
)
|
85 |
+
status_field = gr.Text(label="Status", interactive=False, visible=True) # Add Status field
|
86 |
+
textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot, status_field])
|
87 |
|
88 |
demo.queue().launch()
|