Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -29,12 +29,12 @@ def askme(symptoms, question):
|
|
29 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
30 |
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
31 |
outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
|
32 |
-
response_text = tokenizer.batch_decode(outputs, skip_special_tokens=
|
33 |
# Remove system messages and content
|
34 |
# Extract only the assistant's response
|
35 |
-
assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end|>', '')
|
36 |
# Extract only the assistant's response
|
37 |
-
return
|
38 |
|
39 |
|
40 |
|
|
|
29 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
30 |
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
31 |
outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
|
32 |
+
response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip() #skip_special_tokens=True
|
33 |
# Remove system messages and content
|
34 |
# Extract only the assistant's response
|
35 |
+
#assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end|>', '')
|
36 |
# Extract only the assistant's response
|
37 |
+
return response_text
|
38 |
|
39 |
|
40 |
|