Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -29,18 +29,18 @@ def askme(symptoms, question):
|
|
29 |
|
30 |
prompt = tokenizer.apply_chat_template(messages, template=custom_template, tokenize=False, add_generation_prompt=True)
|
31 |
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
32 |
-
outputs = model.generate(**inputs, max_new_tokens=
|
33 |
response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
|
34 |
|
35 |
# Extract only the assistant's response
|
36 |
start_idx = response_text.find("<|im_start|>assistant")
|
37 |
end_idx = response_text.find("<|im_end|>", start_idx)
|
38 |
assistant_response = response_text[start_idx + len("<|im_start|>assistant"):end_idx]
|
39 |
-
# Return only one answer
|
40 |
-
#answers = assistant_response.split(". ")
|
41 |
-
#return answers[0] + "."
|
42 |
-
assistant_response
|
43 |
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Example usage
|
46 |
symptoms = '''\
|
|
|
29 |
|
30 |
prompt = tokenizer.apply_chat_template(messages, template=custom_template, tokenize=False, add_generation_prompt=True)
|
31 |
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
32 |
+
outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
|
33 |
response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
|
34 |
|
35 |
# Extract only the assistant's response
|
36 |
start_idx = response_text.find("<|im_start|>assistant")
|
37 |
end_idx = response_text.find("<|im_end|>", start_idx)
|
38 |
assistant_response = response_text[start_idx + len("<|im_start|>assistant"):end_idx]
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
# Return only one answer
|
41 |
+
answers = assistant_response.split(". ")
|
42 |
+
return answers[0] + "."
|
43 |
+
|
44 |
|
45 |
# Example usage
|
46 |
symptoms = '''\
|