Medical-Llama3-Chatbot

Runtime error

ruslanmv commited on May 15, 2024

Commit

b6d2b64

verified ·

1 Parent(s): ec0cce5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,18 +29,18 @@ def askme(symptoms, question):
     prompt = tokenizer.apply_chat_template(messages, template=custom_template, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure inputs are on CUDA device
-    outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
     response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
     # Extract only the assistant's response
     start_idx = response_text.find("<|im_start|>assistant")
     end_idx = response_text.find("<|im_end|>", start_idx)
     assistant_response = response_text[start_idx + len("<|im_start|>assistant"):end_idx]
-    # Return only one answer
-    #answers = assistant_response.split(". ")
-    #return answers[0] + "."
-    assistant_response
 # Example usage
 symptoms = '''\

     prompt = tokenizer.apply_chat_template(messages, template=custom_template, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure inputs are on CUDA device
+    outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
     response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
     # Extract only the assistant's response
     start_idx = response_text.find("<|im_start|>assistant")
     end_idx = response_text.find("<|im_end|>", start_idx)
     assistant_response = response_text[start_idx + len("<|im_start|>assistant"):end_idx]
+    # Return only one answer
+    answers = assistant_response.split(". ")
+    return answers[0] + "."
 # Example usage
 symptoms = '''\