Tao Wu commited on
Commit
28a9b71
·
1 Parent(s): 032427b

quantization

Browse files
Files changed (1) hide show
  1. app/embedding_setup.py +2 -1
app/embedding_setup.py CHANGED
@@ -40,7 +40,7 @@ quantization_config = BitsAndBytesConfig(
40
  bnb_4bit_quant_type="nf4"
41
  )
42
 
43
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, quantization_config=quantization_config, token=hf_auth)
44
 
45
 
46
  first_token = 'First'
@@ -50,6 +50,7 @@ first_id = tokenizer.convert_tokens_to_ids(first_token)
50
  second_id = tokenizer.convert_tokens_to_ids(second_token)
51
  model = AutoModelForCausalLM.from_pretrained(
52
  LLM_MODEL,
 
53
  torch_dtype=torch.float16,
54
  device_map="auto",
55
  token=hf_auth,
 
40
  bnb_4bit_quant_type="nf4"
41
  )
42
 
43
+ tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
44
 
45
 
46
  first_token = 'First'
 
50
  second_id = tokenizer.convert_tokens_to_ids(second_token)
51
  model = AutoModelForCausalLM.from_pretrained(
52
  LLM_MODEL,
53
+ quantization_config=quantization_config,
54
  torch_dtype=torch.float16,
55
  device_map="auto",
56
  token=hf_auth,