Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

drmasad commited on Apr 27, 2024

Commit

ce8c007

verified ·

1 Parent(s): 50b517b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,29 +46,23 @@ def load_model(selected_model_name):
     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
-    # Load the model without a device map
-    model = AutoModelForCausalLM.from_pretrained(model_name)
-    # Check the availability of CUDA
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    # Manually move the model to the device
-    model = model.to(device)
-    # Apply quantization configuration if required
-    if device == 'cuda':  # Only apply BitsAndBytesConfig if on CUDA
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16,
-            bnb_4bit_use_double_quant=False,
-            llm_int8_enable_fp32_cpu_offload=False,
-        )
-        # Assume quantization applies here, adjust as per actual use case
-        # model = apply_quantization(model, bnb_config)
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
     peft_config = LoraConfig(
         lora_alpha=16,
         lora_dropout=0.1,
@@ -77,7 +71,6 @@ def load_model(selected_model_name):
         task_type="CAUSAL_LM",
         target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
     )
     model = get_peft_model(model, peft_config)
     tokenizer = AutoTokenizer.from_pretrained(

     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
+    # Ensure the device is properly set for CUDA availability
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load the model with quantization settings
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        trust_remote_code=True,
+    )
+    # Ensure every part of the model is assigned to the correct device
+    model.to(device)  # This should correctly set devices for all components
+    # Additional configurations and training enhancements
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
+    # If using PEFT or other enhancements, configure here
     peft_config = LoraConfig(
         lora_alpha=16,
         lora_dropout=0.1,
         task_type="CAUSAL_LM",
         target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
     )
     model = get_peft_model(model, peft_config)
     tokenizer = AutoTokenizer.from_pretrained(