jatingocodeo commited on
Commit
cbb5f6b
·
verified ·
1 Parent(s): 393b8b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -17
app.py CHANGED
@@ -7,9 +7,6 @@ import torchvision.datasets as datasets
7
  import os
8
 
9
  def load_model(model_id):
10
- # Create offload directory
11
- os.makedirs("offload", exist_ok=True)
12
-
13
  # First load the base model
14
  base_model_id = "microsoft/Phi-3-mini-4k-instruct"
15
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
@@ -18,27 +15,20 @@ def load_model(model_id):
18
  if tokenizer.pad_token is None:
19
  tokenizer.pad_token = tokenizer.eos_token
20
 
21
- # Load base model with 8-bit quantization and offloading
22
  base_model = AutoModelForCausalLM.from_pretrained(
23
  base_model_id,
24
- load_in_8bit=True, # Use 8-bit quantization
25
- torch_dtype=torch.float16,
26
- device_map={
27
- "model.embed_tokens": 0,
28
- "model.layers": "auto",
29
- "model.norm": "cpu",
30
- "lm_head": 0
31
- },
32
- offload_folder="offload",
33
- trust_remote_code=True
34
  )
35
 
36
- # Load the LoRA adapter with same device mapping
37
  model = PeftModel.from_pretrained(
38
  base_model,
39
  model_id,
40
- offload_folder="offload",
41
- device_map="auto"
42
  )
43
 
44
  return model, tokenizer
 
7
  import os
8
 
9
  def load_model(model_id):
 
 
 
10
  # First load the base model
11
  base_model_id = "microsoft/Phi-3-mini-4k-instruct"
12
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
15
  if tokenizer.pad_token is None:
16
  tokenizer.pad_token = tokenizer.eos_token
17
 
18
+ # Load base model for CPU
19
  base_model = AutoModelForCausalLM.from_pretrained(
20
  base_model_id,
21
+ torch_dtype=torch.float32, # Use float32 for CPU
22
+ device_map="cpu", # Force CPU
23
+ trust_remote_code=True,
24
+ low_cpu_mem_usage=True # Enable memory optimization
 
 
 
 
 
 
25
  )
26
 
27
+ # Load the LoRA adapter
28
  model = PeftModel.from_pretrained(
29
  base_model,
30
  model_id,
31
+ device_map="cpu" # Force CPU
 
32
  )
33
 
34
  return model, tokenizer