Medical-Llama3-Chatbot

Runtime error

ruslanmv commited on May 15, 2024

Commit

acb60b8

verified ·

1 Parent(s): e802041

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,8 +13,6 @@ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
 print(f"Using device: {device}")
 #print(f"Using dtype: {dtype}")
 print(f"low memory: {LOW_MEMORY}")
-device = "cuda"
 model_name = "ruslanmv/Medical-Llama3-8B"
 # Move model and tokenizer to the CUDA device
 model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
@@ -33,7 +31,6 @@ def askme(symptoms, question):
     inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure inputs are on CUDA device
     outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
     response_text = tokenizer.batch_decode(outputs)[0].strip()
-    answer = response_text.split('<|im_start|>assistant')[-1].strip()
     return answer
 # Example usage

 print(f"Using device: {device}")
 #print(f"Using dtype: {dtype}")
 print(f"low memory: {LOW_MEMORY}")
 model_name = "ruslanmv/Medical-Llama3-8B"
 # Move model and tokenizer to the CUDA device
 model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
     inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Ensure inputs are on CUDA device
     outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
     response_text = tokenizer.batch_decode(outputs)[0].strip()
     return answer
 # Example usage