Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

drmasad commited on Apr 27, 2024

Commit

ac74534

verified ·

1 Parent(s): 7b94d99

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,14 +42,16 @@ st.sidebar.image("https://www.hmgaihub.com/untitled.png")
 st.sidebar.markdown("*Generated content may be inaccurate or false.*")
 st.sidebar.markdown("*This is an under development project.*")
-# Function to load model
 def load_model(selected_model_name):
     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
-    # Set a specific device
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
@@ -58,18 +60,18 @@ def load_model(selected_model_name):
         llm_int8_enable_fp32_cpu_offload=True,
     )
-    device_map = {
-        'encoder.layer.0': 'cuda',  # Keep specific parts on GPU
-        'decoder': 'cpu',           # Offload others to CPU
-    }
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
-        device_map=device_map,
         trust_remote_code=True,
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
@@ -88,6 +90,7 @@ def load_model(selected_model_name):
     tokenizer = AutoTokenizer.from_pretrained(
         "mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
     )
     st.success("Model is ready. Now we are ready!")
     return model, tokenizer

 st.sidebar.markdown("*Generated content may be inaccurate or false.*")
 st.sidebar.markdown("*This is an under development project.*")
 def load_model(selected_model_name):
     st.info("Loading the model, please wait...")
     model_name = model_links[selected_model_name]
+    # Set default device for all tensor operations
+    torch.set_default_tensor_type('torch.FloatTensor')
+    if torch.cuda.is_available():
+        torch.set_default_tensor_type('torch.cuda.FloatTensor')
+    # Define configuration for loading the model
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         llm_int8_enable_fp32_cpu_offload=True,
     )
+    # Load the model
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         quantization_config=bnb_config,
         trust_remote_code=True,
     )
+    # Explicitly move the model to the correct device
+    if torch.cuda.is_available():
+        model.cuda()  # Move model to GPU
+    else:
+        model.cpu()  # Move model to CPU
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
     tokenizer = AutoTokenizer.from_pretrained(
         "mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
     )
     st.success("Model is ready. Now we are ready!")
     return model, tokenizer