Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -13,8 +13,6 @@ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
|
|
13 |
print(f"Using device: {device}")
|
14 |
#print(f"Using dtype: {dtype}")
|
15 |
print(f"low memory: {LOW_MEMORY}")
|
16 |
-
|
17 |
-
device = "cuda"
|
18 |
model_name = "ruslanmv/Medical-Llama3-8B"
|
19 |
# Move model and tokenizer to the CUDA device
|
20 |
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
@@ -33,7 +31,6 @@ def askme(symptoms, question):
|
|
33 |
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
34 |
outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
|
35 |
response_text = tokenizer.batch_decode(outputs)[0].strip()
|
36 |
-
answer = response_text.split('<|im_start|>assistant')[-1].strip()
|
37 |
return answer
|
38 |
|
39 |
# Example usage
|
|
|
13 |
print(f"Using device: {device}")
|
14 |
#print(f"Using dtype: {dtype}")
|
15 |
print(f"low memory: {LOW_MEMORY}")
|
|
|
|
|
16 |
model_name = "ruslanmv/Medical-Llama3-8B"
|
17 |
# Move model and tokenizer to the CUDA device
|
18 |
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
|
|
31 |
inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
|
32 |
outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
|
33 |
response_text = tokenizer.batch_decode(outputs)[0].strip()
|
|
|
34 |
return answer
|
35 |
|
36 |
# Example usage
|