ukumar557 commited on
Commit
e3867c9
·
verified ·
1 Parent(s): 9d00c70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -10,7 +10,11 @@ if device.type == "cpu":
10
  # Load the tokenizer and model directly
11
  model_name = "ruslanmv/ai-medical-model-32bit"
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
 
 
 
 
14
 
15
 
16
 
@@ -21,11 +25,11 @@ def ask_medical_question(question):
21
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
22
  outputs = model.generate(
23
  **inputs,
24
- max_new_tokens=256,
25
  temperature=0.7,
26
  do_sample=True,
27
- top_p=0.95,
28
- top_k=50,
29
  )
30
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
  return response
 
10
  # Load the tokenizer and model directly
11
  model_name = "ruslanmv/ai-medical-model-32bit"
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = model = AutoModelForCausalLM.from_pretrained(
14
+ model_name,
15
+ device_map="auto",
16
+ load_in_8bit=True
17
+ ).to(device)
18
 
19
 
20
 
 
25
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
26
  outputs = model.generate(
27
  **inputs,
28
+ max_new_tokens=100,
29
  temperature=0.7,
30
  do_sample=True,
31
+ top_p=0.9,
32
+ top_k=30,
33
  )
34
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
  return response