drmasad commited on
Commit
e659be2
·
verified ·
1 Parent(s): 8666754

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -37,30 +37,52 @@ st.sidebar.button("Reset Chat", on_click=reset_conversation)
37
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
38
  st.sidebar.image("https://www.hmgaihub.com/untitled.png")
39
 
40
- # Function to load model
41
  def load_model(selected_model_name):
42
  model_name = model_links[selected_model_name]
43
- base_model = "mistralai/Mistral-7B-Instruct-v0.2"
 
 
 
 
44
  bnb_config = BitsAndBytesConfig(
45
  load_in_4bit=True,
46
  bnb_4bit_quant_type="nf4",
47
  bnb_4bit_compute_dtype=torch.bfloat16,
48
  bnb_4bit_use_double_quant=False,
49
- llm_int8_enable_fp32_cpu_offload=True
50
  )
51
- device_map = {'encoder': 'cuda', 'decoder': 'cpu'}
 
 
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
- model_name, quantization_config=bnb_config, torch_dtype=torch.bfloat16,
54
- device_map=device_map, trust_remote_code=True
 
 
55
  )
 
56
  model.config.use_cache = False
57
  model = prepare_model_for_kbit_training(model)
58
- peft_config = LoraConfig(lora_alpha=16, lora_dropout=0.1, r=64, bias="none", task_type="CAUSAL_LM",
59
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"])
 
 
 
 
 
 
 
 
60
  model = get_peft_model(model, peft_config)
61
- tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
 
 
 
 
62
  return model, tokenizer
63
 
 
64
  # Load model and tokenizer
65
  model, tokenizer = load_model(selected_model)
66
 
 
37
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
38
  st.sidebar.image("https://www.hmgaihub.com/untitled.png")
39
 
 
40
  def load_model(selected_model_name):
41
  model_name = model_links[selected_model_name]
42
+
43
+ # Set a specific device
44
+ device = "cuda" if torch.cuda.is_available() else "cpu"
45
+
46
+ # Load model with device mapping
47
  bnb_config = BitsAndBytesConfig(
48
  load_in_4bit=True,
49
  bnb_4bit_quant_type="nf4",
50
  bnb_4bit_compute_dtype=torch.bfloat16,
51
  bnb_4bit_use_double_quant=False,
52
+ llm_int8_enable_fp32_cpu_offload=True,
53
  )
54
+
55
+ device_map = {"": device} # Default device for all components
56
+
57
+ # Load model with proper device mapping
58
  model = AutoModelForCausalLM.from_pretrained(
59
+ model_name,
60
+ quantization_config=bnb_config,
61
+ device_map=device_map, # Assign device
62
+ trust_remote_code=True,
63
  )
64
+
65
  model.config.use_cache = False
66
  model = prepare_model_for_kbit_training(model)
67
+
68
+ peft_config = LoraConfig(
69
+ lora_alpha=16,
70
+ lora_dropout=0.1,
71
+ r=64,
72
+ bias="none",
73
+ task_type="CAUSAL_LM",
74
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"],
75
+ )
76
+
77
  model = get_peft_model(model, peft_config)
78
+
79
+ tokenizer = AutoTokenizer.from_pretrained(
80
+ "mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
81
+ )
82
+
83
  return model, tokenizer
84
 
85
+
86
  # Load model and tokenizer
87
  model, tokenizer = load_model(selected_model)
88