Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -384,15 +384,13 @@ else:
|
|
384 |
index = faiss.read_index('./storage/faiss_index.faiss')
|
385 |
|
386 |
# Load the model
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
bnb_4bit_compute_dtype=torch.bfloat16
|
392 |
-
)
|
393 |
|
394 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
395 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config
|
396 |
|
397 |
|
398 |
def make_inference(query, hist):
|
|
|
384 |
index = faiss.read_index('./storage/faiss_index.faiss')
|
385 |
|
386 |
# Load the model
|
387 |
+
nf4_config = BitsAndBytesConfig(
|
388 |
+
load_in_4bit=True,
|
389 |
+
bnb_4bit_quant_type="nf4",
|
390 |
+
)
|
|
|
|
|
391 |
|
392 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
393 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config = nf4_config, token=HF_TOKEN)
|
394 |
|
395 |
|
396 |
def make_inference(query, hist):
|