Tobias Bergmann commited on
Commit
91a07e0
·
1 Parent(s): 518754f
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -40,6 +40,9 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
40
  # Initialize token count and start time
41
  token_count = 0
42
  start_time = time.time()
 
 
 
43
 
44
  # This will produce a generator of output chunks
45
  stream = pipe(
@@ -55,16 +58,22 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
55
  reply += new_text
56
  token_count += len(new_text.split()) # Estimate tokens by counting spaces
57
  history[-1][1] = reply # Update the current reply in history
58
-
59
- # Calculate elapsed time and TPS
60
- elapsed_time = time.time() - start_time
 
61
  if elapsed_time > 0:
62
- tps = token_count / elapsed_time
 
63
  else:
64
- tps = 0
65
-
66
  # Update the status using gradio's progress
67
- progress(message=f"Tokens per second: {tps:.2f}")
 
 
 
 
68
 
69
  yield "", history
70
 
@@ -80,7 +89,7 @@ with gr.Blocks() as demo:
80
  value=DEFAULT_MAX_NEW_TOKENS,
81
  label="Max New Tokens",
82
  )
83
- status_field = gr.Text(label="Status", interactive=False, visible=True) # Add Status field
84
  textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot], )
85
 
86
  demo.queue().launch()
 
40
  # Initialize token count and start time
41
  token_count = 0
42
  start_time = time.time()
43
+
44
+ last_token_count = 0
45
+ last_time = start_time
46
 
47
  # This will produce a generator of output chunks
48
  stream = pipe(
 
58
  reply += new_text
59
  token_count += len(new_text.split()) # Estimate tokens by counting spaces
60
  history[-1][1] = reply # Update the current reply in history
61
+
62
+ # Calculate elapsed time since last update
63
+ elapsed_time = time.time() - last_time
64
+
65
  if elapsed_time > 0:
66
+ # Calculate tokens per second since last update
67
+ tokens_per_second = (token_count - last_token_count) / elapsed_time
68
  else:
69
+ tokens_per_second = 0
70
+
71
  # Update the status using gradio's progress
72
+ progress(message=f"Tokens per second: {tokens_per_second:.2f}")
73
+
74
+ # Update for next iteration
75
+ last_token_count = token_count
76
+ last_time = time.time()
77
 
78
  yield "", history
79
 
 
89
  value=DEFAULT_MAX_NEW_TOKENS,
90
  label="Max New Tokens",
91
  )
92
+
93
  textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot], )
94
 
95
  demo.queue().launch()