Spaces:
Running
Running
Work around the empty-message bug.
Browse files- custom_llm.py +8 -4
custom_llm.py
CHANGED
@@ -209,15 +209,19 @@ def logprobs(request: ContinueMessagesRequest):
|
|
209 |
messages = [{"role": m.role, "content": m.content} for m in request.messages]
|
210 |
if len(messages) == 0:
|
211 |
raise HTTPException(status_code=400, detail="At least one message must be provided.")
|
212 |
-
n_branch_tokens = request.n_branch_tokens
|
213 |
-
n_future_tokens = request.n_future_tokens
|
214 |
|
215 |
model = ml_models['llm']['model']
|
216 |
tokenizer = ml_models['llm']['tokenizer']
|
217 |
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
220 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", continue_final_message=True).to(model.device)
|
|
|
|
|
|
|
221 |
|
222 |
# Compute all logits
|
223 |
with torch.no_grad():
|
|
|
209 |
messages = [{"role": m.role, "content": m.content} for m in request.messages]
|
210 |
if len(messages) == 0:
|
211 |
raise HTTPException(status_code=400, detail="At least one message must be provided.")
|
|
|
|
|
212 |
|
213 |
model = ml_models['llm']['model']
|
214 |
tokenizer = ml_models['llm']['tokenizer']
|
215 |
|
216 |
+
# Work around a bug when the last message is empty
|
217 |
+
trim_last_message = False
|
218 |
+
if messages[-1]['content'] == '':
|
219 |
+
messages[-1]['content'] = '.'
|
220 |
+
trim_last_message = True
|
221 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", continue_final_message=True).to(model.device)
|
222 |
+
if trim_last_message:
|
223 |
+
tokenized_chat = tokenized_chat[:, :-1]
|
224 |
+
|
225 |
|
226 |
# Compute all logits
|
227 |
with torch.no_grad():
|