Spaces:

suvadityamuk
/

resume-rag

Sleeping

suvadityamuk commited on Dec 2, 2024

Commit

c82953b

1 Parent(s): d6cf17e

changes

Signed-off-by: Suvaditya Mukherjee <[email protected]>

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import pymupdf
 import gradio as gr
 from qdrant_client import QdrantClient
 from utils import download_pdf_from_gdrive, merge_strings_with_prefix
-from transformers import AutoModelForCausalLM, AutoTokenizer
 def rag_query(query: str):
     """
@@ -150,7 +150,7 @@ if __name__ == "__main__":
         model_name,
         torch_dtype=torch.bfloat16,
         device_map="auto",
-        load_in_4bit=True,
     ).to_bettertransformer().to('cuda')
     tokenizer = AutoTokenizer.from_pretrained(model_name)

 import gradio as gr
 from qdrant_client import QdrantClient
 from utils import download_pdf_from_gdrive, merge_strings_with_prefix
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 def rag_query(query: str):
     """
         model_name,
         torch_dtype=torch.bfloat16,
         device_map="auto",
+        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
     ).to_bettertransformer().to('cuda')
     tokenizer = AutoTokenizer.from_pretrained(model_name)