Spaces:

Joash2024
/

math-llm-demo_v2

Sleeping

Joash2024 commited on Dec 7, 2024

Commit

9f03894

1 Parent(s): 8df9fb2

fix: improve GPU initialization and memory handling

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,11 +15,18 @@ print("Loading base model...")
 model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     device_map="auto",
-    torch_dtype=torch.float16
 )
 print("Loading LoRA adapter...")
-model = PeftModel.from_pretrained(model, ADAPTER_MODEL)
 model.eval()
 def format_prompt(function: str) -> str:

 model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     device_map="auto",
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True,
+    use_safetensors=True
 )
 print("Loading LoRA adapter...")
+model = PeftModel.from_pretrained(
+    model,
+    ADAPTER_MODEL,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
 model.eval()
 def format_prompt(function: str) -> str: