Harsh2001 commited on
Commit
a92113b
·
verified ·
1 Parent(s): 0df5f26

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +8 -0
utils.py CHANGED
@@ -18,6 +18,13 @@ model_id = 'meta-llama/Llama-2-7b-chat-hf'
18
 
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
 
 
 
 
 
 
 
 
21
  model_config = transformers.AutoConfig.from_pretrained(
22
  model_id,
23
  use_auth_token=hf_auth
@@ -27,6 +34,7 @@ llm_model = transformers.AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  trust_remote_code=True,
29
  config=model_config,
 
30
  device_map='auto',
31
  use_auth_token=hf_auth
32
  )
 
18
 
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
 
21
+ bnb_config = transformers.BitsAndBytesConfig(
22
+ load_in_4bit=True,
23
+ bnb_4bit_quant_type='nf4',
24
+ bnb_4bit_use_double_quant=True,
25
+ bnb_4bit_compute_dtype=bfloat16
26
+ )
27
+
28
  model_config = transformers.AutoConfig.from_pretrained(
29
  model_id,
30
  use_auth_token=hf_auth
 
34
  model_id,
35
  trust_remote_code=True,
36
  config=model_config,
37
+ quantization_config=bnb_config,
38
  device_map='auto',
39
  use_auth_token=hf_auth
40
  )