Fixing tokens repetition ? - What wind.surf will do ?
Browse files- app.py +8 -5
- models.lst +1 -1
app.py
CHANGED
@@ -164,11 +164,14 @@ with gr.Blocks() as demo:
|
|
164 |
|
165 |
def submit_message(message, chat_history, model_name, system_message, max_tokens, temperature, top_p):
|
166 |
history = [] if chat_history is None else chat_history
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
172 |
yield history, ""
|
173 |
|
174 |
submit_event = submit.click(
|
|
|
164 |
|
165 |
def submit_message(message, chat_history, model_name, system_message, max_tokens, temperature, top_p):
|
166 |
history = [] if chat_history is None else chat_history
|
167 |
+
|
168 |
+
# Add user message first
|
169 |
+
history = history + [{"role": "user", "content": message}]
|
170 |
+
|
171 |
+
# Then stream the assistant's response
|
172 |
+
for response in respond(message, history[:-1], model_name, system_message, max_tokens, temperature, top_p):
|
173 |
+
history[-1] = {"role": "user", "content": message}
|
174 |
+
history = history + [{"role": "assistant", "content": response}]
|
175 |
yield history, ""
|
176 |
|
177 |
submit_event = submit.click(
|
models.lst
CHANGED
@@ -12,6 +12,6 @@ https://huggingface.co/lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF
|
|
12 |
|
13 |
https://huggingface.co/lmstudio-community/granite-3.0-1b-a400m-instruct-GGUF
|
14 |
|
15 |
-
https://huggingface.co/lmstudio-community/AMD-OLMo-1B-SFT-GGUF
|
16 |
|
17 |
|
|
|
12 |
|
13 |
https://huggingface.co/lmstudio-community/granite-3.0-1b-a400m-instruct-GGUF
|
14 |
|
15 |
+
https://huggingface.co/lmstudio-community/AMD-OLMo-1B-SFT-DPO-GGUF
|
16 |
|
17 |
|