Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

drmasad commited on Apr 27, 2024

Commit

c69da53

verified ·

1 Parent(s): 7d9d9be

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,12 +46,7 @@ def load_model(selected_model_name):
     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
-    # Set default device for all tensor operations
-    torch.set_default_tensor_type('torch.FloatTensor')
-    if torch.cuda.is_available():
-        torch.set_default_tensor_type('torch.cuda.FloatTensor')
-    # Define configuration for loading the model
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
@@ -60,19 +55,21 @@ def load_model(selected_model_name):
         llm_int8_enable_fp32_cpu_offload=True,
     )
-    # Load the model
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
         trust_remote_code=True,
     )
-    # Explicitly move the model to the correct device
-    if torch.cuda.is_available():
-        model.cuda()  # Move model to GPU
-    else:
-        model.cpu()  # Move model to CPU
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
@@ -95,6 +92,7 @@ def load_model(selected_model_name):
     return model, tokenizer
 # Load model and tokenizer
 model, tokenizer = load_model(selected_model)

     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
+    # Configure the quantization and device settings
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         llm_int8_enable_fp32_cpu_offload=True,
     )
+    # Device map to specify where each component should reside
+    device_map = {
+        'encoder': 'cuda',  # or 'cpu' if reducing GPU load is crucial
+        'decoder': 'cpu',
+        'embed_tokens': 'cpu'
+    }
+    # Load the model with the specified device map and quantization config
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
+        device_map=device_map,
         trust_remote_code=True,
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
     return model, tokenizer
 # Load model and tokenizer
 model, tokenizer = load_model(selected_model)