pablorocg commited on
Commit
2bd2ae7
·
verified ·
1 Parent(s): ff770dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -384,15 +384,13 @@ else:
384
  index = faiss.read_index('./storage/faiss_index.faiss')
385
 
386
  # Load the model
387
- quantization_config = BitsAndBytesConfig(
388
- load_in_4bit=True,
389
- bnb_4bit_use_double_quant=True,
390
- bnb_4bit_quant_type="nf4",
391
- bnb_4bit_compute_dtype=torch.bfloat16
392
- )
393
 
394
  tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
395
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config=quantization_config, torch_dtype=torch.float16, low_cpu_mem_usage=True, token=HF_TOKEN)
396
 
397
 
398
  def make_inference(query, hist):
 
384
  index = faiss.read_index('./storage/faiss_index.faiss')
385
 
386
  # Load the model
387
+ nf4_config = BitsAndBytesConfig(
388
+ load_in_4bit=True,
389
+ bnb_4bit_quant_type="nf4",
390
+ )
 
 
391
 
392
  tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
393
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config = nf4_config, token=HF_TOKEN)
394
 
395
 
396
  def make_inference(query, hist):