drmasad commited on
Commit
397c64d
·
verified ·
1 Parent(s): ca3be5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -48,7 +48,6 @@ def load_model(selected_model_name):
48
  # Set a specific device
49
  device = "cuda" if torch.cuda.is_available() else "cpu"
50
 
51
- # Load model with device mapping
52
  bnb_config = BitsAndBytesConfig(
53
  load_in_4bit=True,
54
  bnb_4bit_quant_type="nf4",
@@ -57,16 +56,19 @@ def load_model(selected_model_name):
57
  llm_int8_enable_fp32_cpu_offload=True,
58
  )
59
 
60
- device_map = {"": device} # Default device for all components
61
-
62
- # Load model with proper device mapping
 
 
63
  model = AutoModelForCausalLM.from_pretrained(
64
  model_name,
65
  quantization_config=bnb_config,
66
- device_map=device_map, # Assign device
67
  trust_remote_code=True,
68
  )
69
 
 
70
  model.config.use_cache = False
71
  model = prepare_model_for_kbit_training(model)
72
 
 
48
  # Set a specific device
49
  device = "cuda" if torch.cuda.is_available() else "cpu"
50
 
 
51
  bnb_config = BitsAndBytesConfig(
52
  load_in_4bit=True,
53
  bnb_4bit_quant_type="nf4",
 
56
  llm_int8_enable_fp32_cpu_offload=True,
57
  )
58
 
59
+ device_map = {
60
+ 'encoder.layer.0': 'cuda', # Keep specific parts on GPU
61
+ 'decoder': 'cpu', # Offload others to CPU
62
+ }
63
+
64
  model = AutoModelForCausalLM.from_pretrained(
65
  model_name,
66
  quantization_config=bnb_config,
67
+ device_map=device_map,
68
  trust_remote_code=True,
69
  )
70
 
71
+
72
  model.config.use_cache = False
73
  model = prepare_model_for_kbit_training(model)
74