Spaces:

drmasad
/

HAH-2024-v0.1

Runtime error

App Files Files

drmasad commited on Apr 27, 2024

Commit

e659be2

verified ·

1 Parent(s): 8666754

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -9

app.py CHANGED Viewed

@@ -37,30 +37,52 @@ st.sidebar.button("Reset Chat", on_click=reset_conversation)
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
 st.sidebar.image("https://www.hmgaihub.com/untitled.png")
-# Function to load model
 def load_model(selected_model_name):
     model_name = model_links[selected_model_name]
-    base_model = "mistralai/Mistral-7B-Instruct-v0.2"
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_compute_dtype=torch.bfloat16,
         bnb_4bit_use_double_quant=False,
-        llm_int8_enable_fp32_cpu_offload=True
     )
-    device_map = {'encoder': 'cuda', 'decoder': 'cpu'}
     model = AutoModelForCausalLM.from_pretrained(
-        model_name, quantization_config=bnb_config, torch_dtype=torch.bfloat16,
-        device_map=device_map, trust_remote_code=True
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
-    peft_config = LoraConfig(lora_alpha=16, lora_dropout=0.1, r=64, bias="none", task_type="CAUSAL_LM",
-                             target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"])
     model = get_peft_model(model, peft_config)
-    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
     return model, tokenizer
 # Load model and tokenizer
 model, tokenizer = load_model(selected_model)

 st.sidebar.write(f"You're now chatting with **{selected_model}**")
 st.sidebar.image("https://www.hmgaihub.com/untitled.png")
 def load_model(selected_model_name):
     model_name = model_links[selected_model_name]
+    # Set a specific device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Load model with device mapping
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_compute_dtype=torch.bfloat16,
         bnb_4bit_use_double_quant=False,
+        llm_int8_enable_fp32_cpu_offload=True,
     )
+    device_map = {"": device}  # Default device for all components
+    # Load model with proper device mapping
     model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        quantization_config=bnb_config,
+        device_map=device_map,  # Assign device
+        trust_remote_code=True,
     )
     model.config.use_cache = False
     model = prepare_model_for_kbit_training(model)
+    peft_config = LoraConfig(
+        lora_alpha=16,
+        lora_dropout=0.1,
+        r=64,
+        bias="none",
+        task_type="CAUSAL_LM",
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
+    )
     model = get_peft_model(model, peft_config)
+    tokenizer = AutoTokenizer.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
+    )
     return model, tokenizer
 # Load model and tokenizer
 model, tokenizer = load_model(selected_model)