Spaces:
Paused
Paused
Tao Wu
commited on
Commit
·
94129fe
1
Parent(s):
28a9b71
8bit
Browse files- app/embedding_setup.py +1 -6
app/embedding_setup.py
CHANGED
@@ -34,11 +34,6 @@ retriever = db.as_retriever(search_kwargs={"k": TOP_K})
|
|
34 |
lora_weights_rec = REC_LORA_MODEL
|
35 |
lora_weights_exp = EXP_LORA_MODEL
|
36 |
hf_auth = os.environ.get("hf_token")
|
37 |
-
quantization_config = BitsAndBytesConfig(
|
38 |
-
load_in_4bit=True,
|
39 |
-
bnb_4bit_compute_dtype=torch.float16,
|
40 |
-
bnb_4bit_quant_type="nf4"
|
41 |
-
)
|
42 |
|
43 |
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
|
44 |
|
@@ -50,7 +45,7 @@ first_id = tokenizer.convert_tokens_to_ids(first_token)
|
|
50 |
second_id = tokenizer.convert_tokens_to_ids(second_token)
|
51 |
model = AutoModelForCausalLM.from_pretrained(
|
52 |
LLM_MODEL,
|
53 |
-
|
54 |
torch_dtype=torch.float16,
|
55 |
device_map="auto",
|
56 |
token=hf_auth,
|
|
|
34 |
lora_weights_rec = REC_LORA_MODEL
|
35 |
lora_weights_exp = EXP_LORA_MODEL
|
36 |
hf_auth = os.environ.get("hf_token")
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
|
39 |
|
|
|
45 |
second_id = tokenizer.convert_tokens_to_ids(second_token)
|
46 |
model = AutoModelForCausalLM.from_pretrained(
|
47 |
LLM_MODEL,
|
48 |
+
load_in_8bit=True,
|
49 |
torch_dtype=torch.float16,
|
50 |
device_map="auto",
|
51 |
token=hf_auth,
|