YangWu001 commited on
Commit
c97ee35
·
1 Parent(s): b0b56fc
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -6,7 +6,7 @@ from transformers import pipeline
6
 
7
  # Inference client setup
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
- #pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
10
 
11
  # Global flag to handle cancellation
12
  stop_inference = False
@@ -48,7 +48,7 @@ def respond(
48
  yield response # Yielding response directly
49
 
50
  # Ensure the history is updated after generating the response
51
- history.append((message, response))
52
  yield history # Yield the updated history
53
 
54
  else:
@@ -77,7 +77,7 @@ def respond(
77
  yield response # Yielding response directly
78
 
79
  # Ensure the history is updated after generating the response
80
- history.append((message, response))
81
  yield history # Yield the updated history
82
 
83
  def cancel_inference():
@@ -141,7 +141,7 @@ with gr.Blocks(css=custom_css) as demo:
141
 
142
  with gr.Row():
143
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
144
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
145
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
146
 
147
  chat_history = gr.Chatbot(label="Chat")
@@ -161,10 +161,13 @@ with gr.Blocks(css=custom_css) as demo:
161
  top_p.value,
162
  use_local_model.value,
163
  )
 
164
  for response in response_gen:
165
- # Replace the last history tuple with the complete message-response pair
166
- history[-1] = (message, response)
167
- yield history
 
 
168
 
169
  user_input.submit(chat_fn, [user_input, chat_history], chat_history)
170
  cancel_button.click(cancel_inference)
 
6
 
7
  # Inference client setup
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
+ # pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
10
 
11
  # Global flag to handle cancellation
12
  stop_inference = False
 
48
  yield response # Yielding response directly
49
 
50
  # Ensure the history is updated after generating the response
51
+ history[-1] = (message, response) # Update the last tuple in history with the full response
52
  yield history # Yield the updated history
53
 
54
  else:
 
77
  yield response # Yielding response directly
78
 
79
  # Ensure the history is updated after generating the response
80
+ history[-1] = (message, response) # Update the last tuple in history with the full response
81
  yield history # Yield the updated history
82
 
83
  def cancel_inference():
 
141
 
142
  with gr.Row():
143
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
144
+ temperature = gr.Slider(minimum=0.1, maximum 4.0, value=0.7, step=0.1, label="Temperature")
145
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
146
 
147
  chat_history = gr.Chatbot(label="Chat")
 
161
  top_p.value,
162
  use_local_model.value,
163
  )
164
+ full_response = ""
165
  for response in response_gen:
166
+ full_response += response # Accumulate the full response
167
+
168
+ # Replace the last history tuple with the complete message-response pair
169
+ history[-1] = (message, full_response)
170
+ yield history
171
 
172
  user_input.submit(chat_fn, [user_input, chat_history], chat_history)
173
  cancel_button.click(cancel_inference)