expandme commited on
Commit
54081a3
1 Parent(s): 7537b38

Fixing tokens repetition ? - What wind.surf will do ?

Browse files
Files changed (2) hide show
  1. app.py +8 -5
  2. models.lst +1 -1
app.py CHANGED
@@ -164,11 +164,14 @@ with gr.Blocks() as demo:
164
 
165
  def submit_message(message, chat_history, model_name, system_message, max_tokens, temperature, top_p):
166
  history = [] if chat_history is None else chat_history
167
- for response in respond(message, history, model_name, system_message, max_tokens, temperature, top_p):
168
- history = history + [
169
- {"role": "user", "content": message},
170
- {"role": "assistant", "content": response}
171
- ]
 
 
 
172
  yield history, ""
173
 
174
  submit_event = submit.click(
 
164
 
165
  def submit_message(message, chat_history, model_name, system_message, max_tokens, temperature, top_p):
166
  history = [] if chat_history is None else chat_history
167
+
168
+ # Add user message first
169
+ history = history + [{"role": "user", "content": message}]
170
+
171
+ # Then stream the assistant's response
172
+ for response in respond(message, history[:-1], model_name, system_message, max_tokens, temperature, top_p):
173
+ history[-1] = {"role": "user", "content": message}
174
+ history = history + [{"role": "assistant", "content": response}]
175
  yield history, ""
176
 
177
  submit_event = submit.click(
models.lst CHANGED
@@ -12,6 +12,6 @@ https://huggingface.co/lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF
12
 
13
  https://huggingface.co/lmstudio-community/granite-3.0-1b-a400m-instruct-GGUF
14
 
15
- https://huggingface.co/lmstudio-community/AMD-OLMo-1B-SFT-GGUF
16
 
17
 
 
12
 
13
  https://huggingface.co/lmstudio-community/granite-3.0-1b-a400m-instruct-GGUF
14
 
15
+ https://huggingface.co/lmstudio-community/AMD-OLMo-1B-SFT-DPO-GGUF
16
 
17