suvadityamuk commited on
Commit
c82953b
·
1 Parent(s): d6cf17e

Signed-off-by: Suvaditya Mukherjee <[email protected]>

Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -7,7 +7,7 @@ import pymupdf
7
  import gradio as gr
8
  from qdrant_client import QdrantClient
9
  from utils import download_pdf_from_gdrive, merge_strings_with_prefix
10
- from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
12
  def rag_query(query: str):
13
  """
@@ -150,7 +150,7 @@ if __name__ == "__main__":
150
  model_name,
151
  torch_dtype=torch.bfloat16,
152
  device_map="auto",
153
- load_in_4bit=True,
154
  ).to_bettertransformer().to('cuda')
155
 
156
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
7
  import gradio as gr
8
  from qdrant_client import QdrantClient
9
  from utils import download_pdf_from_gdrive, merge_strings_with_prefix
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
11
 
12
  def rag_query(query: str):
13
  """
 
150
  model_name,
151
  torch_dtype=torch.bfloat16,
152
  device_map="auto",
153
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
154
  ).to_bettertransformer().to('cuda')
155
 
156
  tokenizer = AutoTokenizer.from_pretrained(model_name)