Tobias Bergmann commited on
Commit
4a44fb0
·
1 Parent(s): 91a07e0
Files changed (1) hide show
  1. app.py +13 -20
app.py CHANGED
@@ -29,7 +29,7 @@ pipe = Llama(
29
 
30
  def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, progress=gr.Progress()):
31
  if not message:
32
- return "", history
33
 
34
  prompt = message
35
  history.append([message, ""])
@@ -40,9 +40,6 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
40
  # Initialize token count and start time
41
  token_count = 0
42
  start_time = time.time()
43
-
44
- last_token_count = 0
45
- last_time = start_time
46
 
47
  # This will produce a generator of output chunks
48
  stream = pipe(
@@ -58,24 +55,20 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
58
  reply += new_text
59
  token_count += len(new_text.split()) # Estimate tokens by counting spaces
60
  history[-1][1] = reply # Update the current reply in history
61
-
62
- # Calculate elapsed time since last update
63
- elapsed_time = time.time() - last_time
64
-
65
  if elapsed_time > 0:
66
- # Calculate tokens per second since last update
67
- tokens_per_second = (token_count - last_token_count) / elapsed_time
68
  else:
69
- tokens_per_second = 0
70
-
71
  # Update the status using gradio's progress
72
- progress(message=f"Tokens per second: {tokens_per_second:.2f}")
73
-
74
- # Update for next iteration
75
- last_token_count = token_count
76
- last_time = time.time()
77
 
78
- yield "", history
79
 
80
 
81
  with gr.Blocks() as demo:
@@ -89,7 +82,7 @@ with gr.Blocks() as demo:
89
  value=DEFAULT_MAX_NEW_TOKENS,
90
  label="Max New Tokens",
91
  )
92
-
93
- textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot], )
94
 
95
  demo.queue().launch()
 
29
 
30
  def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, progress=gr.Progress()):
31
  if not message:
32
+ return "", history, ""
33
 
34
  prompt = message
35
  history.append([message, ""])
 
40
  # Initialize token count and start time
41
  token_count = 0
42
  start_time = time.time()
 
 
 
43
 
44
  # This will produce a generator of output chunks
45
  stream = pipe(
 
55
  reply += new_text
56
  token_count += len(new_text.split()) # Estimate tokens by counting spaces
57
  history[-1][1] = reply # Update the current reply in history
58
+
59
+ # Calculate elapsed time and TPS
60
+ elapsed_time = time.time() - start_time
 
61
  if elapsed_time > 0:
62
+ tps = token_count / elapsed_time
 
63
  else:
64
+ tps = 0
65
+
66
  # Update the status using gradio's progress
67
+ status_message = f"Tokens per second: {tps:.2f}"
68
+
69
+ yield "", history, status_message
 
 
70
 
71
+
72
 
73
 
74
  with gr.Blocks() as demo:
 
82
  value=DEFAULT_MAX_NEW_TOKENS,
83
  label="Max New Tokens",
84
  )
85
+ status_field = gr.Text(label="Status", interactive=False, visible=True) # Add Status field
86
+ textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot, status_field])
87
 
88
  demo.queue().launch()