ruslanmv commited on
Commit
b6d2b64
·
verified ·
1 Parent(s): ec0cce5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -29,18 +29,18 @@ def askme(symptoms, question):
29
 
30
  prompt = tokenizer.apply_chat_template(messages, template=custom_template, tokenize=False, add_generation_prompt=True)
31
  inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
32
- outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
33
  response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
34
 
35
  # Extract only the assistant's response
36
  start_idx = response_text.find("<|im_start|>assistant")
37
  end_idx = response_text.find("<|im_end|>", start_idx)
38
  assistant_response = response_text[start_idx + len("<|im_start|>assistant"):end_idx]
39
- # Return only one answer
40
- #answers = assistant_response.split(". ")
41
- #return answers[0] + "."
42
- assistant_response
43
 
 
 
 
 
44
 
45
  # Example usage
46
  symptoms = '''\
 
29
 
30
  prompt = tokenizer.apply_chat_template(messages, template=custom_template, tokenize=False, add_generation_prompt=True)
31
  inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
32
+ outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
33
  response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
34
 
35
  # Extract only the assistant's response
36
  start_idx = response_text.find("<|im_start|>assistant")
37
  end_idx = response_text.find("<|im_end|>", start_idx)
38
  assistant_response = response_text[start_idx + len("<|im_start|>assistant"):end_idx]
 
 
 
 
39
 
40
+ # Return only one answer
41
+ answers = assistant_response.split(". ")
42
+ return answers[0] + "."
43
+
44
 
45
  # Example usage
46
  symptoms = '''\