Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

drmasad commited on Apr 27, 2024

Commit

216f23b

verified ·

1 Parent(s): 5a3a00b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,17 +46,22 @@ def load_model(selected_model_name):
     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
-    # Ensure the device is properly set for CUDA availability
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Load the model with quantization settings
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
         trust_remote_code=True,
     )
     # Additional configurations and training enhancements
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
@@ -82,6 +87,7 @@ def load_model(selected_model_name):
 # Load model and tokenizer
 model, tokenizer = load_model(selected_model)

     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
+    # Define the BitsAndBytesConfig for quantization
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16,
+        bnb_4bit_use_double_quant=False,
+        llm_int8_enable_fp32_cpu_offload=True,
+    )
+    # Load the model with quantization settings directly applied
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
         trust_remote_code=True,
     )
     # Additional configurations and training enhancements
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
 # Load model and tokenizer
 model, tokenizer = load_model(selected_model)