Spaces:
Sleeping
Sleeping
Commit
·
c82953b
1
Parent(s):
d6cf17e
changes
Browse filesSigned-off-by: Suvaditya Mukherjee <[email protected]>
app.py
CHANGED
@@ -7,7 +7,7 @@ import pymupdf
|
|
7 |
import gradio as gr
|
8 |
from qdrant_client import QdrantClient
|
9 |
from utils import download_pdf_from_gdrive, merge_strings_with_prefix
|
10 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
11 |
|
12 |
def rag_query(query: str):
|
13 |
"""
|
@@ -150,7 +150,7 @@ if __name__ == "__main__":
|
|
150 |
model_name,
|
151 |
torch_dtype=torch.bfloat16,
|
152 |
device_map="auto",
|
153 |
-
|
154 |
).to_bettertransformer().to('cuda')
|
155 |
|
156 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
7 |
import gradio as gr
|
8 |
from qdrant_client import QdrantClient
|
9 |
from utils import download_pdf_from_gdrive, merge_strings_with_prefix
|
10 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
11 |
|
12 |
def rag_query(query: str):
|
13 |
"""
|
|
|
150 |
model_name,
|
151 |
torch_dtype=torch.bfloat16,
|
152 |
device_map="auto",
|
153 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
154 |
).to_bettertransformer().to('cuda')
|
155 |
|
156 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|