Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

drmasad commited on Apr 27, 2024

Commit

397c64d

verified ·

1 Parent(s): ca3be5f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,7 +48,6 @@ def load_model(selected_model_name):
     # Set a specific device
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    # Load model with device mapping
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
@@ -57,16 +56,19 @@ def load_model(selected_model_name):
         llm_int8_enable_fp32_cpu_offload=True,
     )
-    device_map = {"": device}  # Default device for all components
-    # Load model with proper device mapping
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
-        device_map=device_map,  # Assign device
         trust_remote_code=True,
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)

     # Set a specific device
     device = "cuda" if torch.cuda.is_available() else "cpu"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         llm_int8_enable_fp32_cpu_offload=True,
     )
+    device_map = {
+        'encoder.layer.0': 'cuda',  # Keep specific parts on GPU
+        'decoder': 'cpu',           # Offload others to CPU
+    }
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
+        device_map=device_map,
         trust_remote_code=True,
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)