Spaces:
Sleeping
Sleeping
aaaaa
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from transformers import pipeline
|
|
6 |
|
7 |
# Inference client setup
|
8 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
9 |
-
#pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
|
10 |
|
11 |
# Global flag to handle cancellation
|
12 |
stop_inference = False
|
@@ -48,7 +48,7 @@ def respond(
|
|
48 |
yield response # Yielding response directly
|
49 |
|
50 |
# Ensure the history is updated after generating the response
|
51 |
-
history
|
52 |
yield history # Yield the updated history
|
53 |
|
54 |
else:
|
@@ -77,7 +77,7 @@ def respond(
|
|
77 |
yield response # Yielding response directly
|
78 |
|
79 |
# Ensure the history is updated after generating the response
|
80 |
-
history
|
81 |
yield history # Yield the updated history
|
82 |
|
83 |
def cancel_inference():
|
@@ -141,7 +141,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
141 |
|
142 |
with gr.Row():
|
143 |
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
|
144 |
-
temperature = gr.Slider(minimum=0.1, maximum
|
145 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
146 |
|
147 |
chat_history = gr.Chatbot(label="Chat")
|
@@ -161,10 +161,13 @@ with gr.Blocks(css=custom_css) as demo:
|
|
161 |
top_p.value,
|
162 |
use_local_model.value,
|
163 |
)
|
|
|
164 |
for response in response_gen:
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
168 |
|
169 |
user_input.submit(chat_fn, [user_input, chat_history], chat_history)
|
170 |
cancel_button.click(cancel_inference)
|
|
|
6 |
|
7 |
# Inference client setup
|
8 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
9 |
+
# pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
|
10 |
|
11 |
# Global flag to handle cancellation
|
12 |
stop_inference = False
|
|
|
48 |
yield response # Yielding response directly
|
49 |
|
50 |
# Ensure the history is updated after generating the response
|
51 |
+
history[-1] = (message, response) # Update the last tuple in history with the full response
|
52 |
yield history # Yield the updated history
|
53 |
|
54 |
else:
|
|
|
77 |
yield response # Yielding response directly
|
78 |
|
79 |
# Ensure the history is updated after generating the response
|
80 |
+
history[-1] = (message, response) # Update the last tuple in history with the full response
|
81 |
yield history # Yield the updated history
|
82 |
|
83 |
def cancel_inference():
|
|
|
141 |
|
142 |
with gr.Row():
|
143 |
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
|
144 |
+
temperature = gr.Slider(minimum=0.1, maximum 4.0, value=0.7, step=0.1, label="Temperature")
|
145 |
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
146 |
|
147 |
chat_history = gr.Chatbot(label="Chat")
|
|
|
161 |
top_p.value,
|
162 |
use_local_model.value,
|
163 |
)
|
164 |
+
full_response = ""
|
165 |
for response in response_gen:
|
166 |
+
full_response += response # Accumulate the full response
|
167 |
+
|
168 |
+
# Replace the last history tuple with the complete message-response pair
|
169 |
+
history[-1] = (message, full_response)
|
170 |
+
yield history
|
171 |
|
172 |
user_input.submit(chat_fn, [user_input, chat_history], chat_history)
|
173 |
cancel_button.click(cancel_inference)
|