Spaces:

wt3639
/

Course_rec

Paused

Tao Wu commited on Jul 2, 2024

Commit

94129fe

1 Parent(s): 28a9b71

8bit

Files changed (1) hide show

app/embedding_setup.py CHANGED Viewed

@@ -34,11 +34,6 @@ retriever = db.as_retriever(search_kwargs={"k": TOP_K})
 lora_weights_rec = REC_LORA_MODEL
 lora_weights_exp = EXP_LORA_MODEL
 hf_auth  = os.environ.get("hf_token")
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_quant_type="nf4"
-)
 tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
@@ -50,7 +45,7 @@ first_id = tokenizer.convert_tokens_to_ids(first_token)
 second_id = tokenizer.convert_tokens_to_ids(second_token)
 model = AutoModelForCausalLM.from_pretrained(
             LLM_MODEL,
-             quantization_config=quantization_config,
             torch_dtype=torch.float16,
             device_map="auto",
             token=hf_auth,

 lora_weights_rec = REC_LORA_MODEL
 lora_weights_exp = EXP_LORA_MODEL
 hf_auth  = os.environ.get("hf_token")
 tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
 second_id = tokenizer.convert_tokens_to_ids(second_token)
 model = AutoModelForCausalLM.from_pretrained(
             LLM_MODEL,
+            load_in_8bit=True,
             torch_dtype=torch.float16,
             device_map="auto",
             token=hf_auth,