Joash2024 commited on
Commit
9f03894
·
1 Parent(s): 8df9fb2

fix: improve GPU initialization and memory handling

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -15,11 +15,18 @@ print("Loading base model...")
15
  model = AutoModelForCausalLM.from_pretrained(
16
  BASE_MODEL,
17
  device_map="auto",
18
- torch_dtype=torch.float16
 
 
19
  )
20
 
21
  print("Loading LoRA adapter...")
22
- model = PeftModel.from_pretrained(model, ADAPTER_MODEL)
 
 
 
 
 
23
  model.eval()
24
 
25
  def format_prompt(function: str) -> str:
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
  BASE_MODEL,
17
  device_map="auto",
18
+ torch_dtype=torch.float16,
19
+ low_cpu_mem_usage=True,
20
+ use_safetensors=True
21
  )
22
 
23
  print("Loading LoRA adapter...")
24
+ model = PeftModel.from_pretrained(
25
+ model,
26
+ ADAPTER_MODEL,
27
+ torch_dtype=torch.float16,
28
+ device_map="auto"
29
+ )
30
  model.eval()
31
 
32
  def format_prompt(function: str) -> str: