ruslanmv commited on
Commit
acb60b8
1 Parent(s): e802041

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -3
app.py CHANGED
@@ -13,8 +13,6 @@ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
13
  print(f"Using device: {device}")
14
  #print(f"Using dtype: {dtype}")
15
  print(f"low memory: {LOW_MEMORY}")
16
-
17
- device = "cuda"
18
  model_name = "ruslanmv/Medical-Llama3-8B"
19
  # Move model and tokenizer to the CUDA device
20
  model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
@@ -33,7 +31,6 @@ def askme(symptoms, question):
33
  inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
34
  outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
35
  response_text = tokenizer.batch_decode(outputs)[0].strip()
36
- answer = response_text.split('<|im_start|>assistant')[-1].strip()
37
  return answer
38
 
39
  # Example usage
 
13
  print(f"Using device: {device}")
14
  #print(f"Using dtype: {dtype}")
15
  print(f"low memory: {LOW_MEMORY}")
 
 
16
  model_name = "ruslanmv/Medical-Llama3-8B"
17
  # Move model and tokenizer to the CUDA device
18
  model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
 
31
  inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
32
  outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
33
  response_text = tokenizer.batch_decode(outputs)[0].strip()
 
34
  return answer
35
 
36
  # Example usage