Spaces:

pablorocg
/

MedicalRAG

Runtime error

pablorocg commited on Apr 1, 2024

Commit

2bd2ae7

verified ·

1 Parent(s): ff770dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -384,15 +384,13 @@ else:
     index = faiss.read_index('./storage/faiss_index.faiss')
 # Load the model
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
 tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
-model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config=quantization_config, torch_dtype=torch.float16, low_cpu_mem_usage=True, token=HF_TOKEN)
 def make_inference(query, hist):

     index = faiss.read_index('./storage/faiss_index.faiss')
 # Load the model
+nf4_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+    )
 tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
+model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", quantization_config = nf4_config, token=HF_TOKEN)
 def make_inference(query, hist):