Medical-Llama3-Chatbot

Runtime error

ruslanmv commited on May 15, 2024

Commit

0056b34

verified ·

1 Parent(s): a02afc4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,12 +29,12 @@ def askme(symptoms, question):
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure inputs are on CUDA device
     outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
-    response_text = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0].strip() #skip_special_tokens=True
     # Remove system messages and content
     # Extract only the assistant's response
-    assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end|>', '')
    #  Extract only the assistant's response
-    return assistant_response

     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure inputs are on CUDA device
     outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
+    response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip() #skip_special_tokens=True
     # Remove system messages and content
     # Extract only the assistant's response
+    #assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end|>', '')
    #  Extract only the assistant's response
+    return response_text