Spaces:

himel06
/

BanglaRAG

Runtime error

App Files Files Community

himel06 commited on Aug 17, 2024

Commit

6f95d6d

verified ·

1 Parent(s): d9b0e2e

Update BanglaRAG/bangla_rag_pipeline.py

Browse files

Files changed (1) hide show

BanglaRAG/bangla_rag_pipeline.py +18 -61

BanglaRAG/bangla_rag_pipeline.py CHANGED Viewed

@@ -13,100 +13,57 @@ from langchain_community.vectorstores import Chroma
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
 import warnings
 warnings.filterwarnings("ignore")
 class BanglaRAGChain:
     def __init__(self):
-        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.chat_model_id = None
-        self.embed_model_id = None
-        self.k = 4
-        self.max_new_tokens = 1024
-        self.chunk_size = 500
-        self.chunk_overlap = 150
-        self.text_path = ""
-        self.quantization = None
-        self.temperature = 0.9
-        self.top_p = 0.6
-        self.top_k = 50
-        self._text_content = None
-        self.hf_token = None
-        self.tokenizer = None
-        self.chat_model = None
-        self._llm = None
-        self._retriever = None
-        self._db = None
-        self._documents = []
-        self._chain = None
-    def load(
-        self,
-        chat_model_id,
-        embed_model_id,
-        text_path,
-        quantization,
-        k=4,
-        top_k=2,
-        top_p=0.6,
-        max_new_tokens=1024,
-        temperature=0.6,
-        chunk_size=500,
-        chunk_overlap=150,
-        hf_token=None,
-    ):
         self.chat_model_id = chat_model_id
         self.embed_model_id = embed_model_id
         self.k = k
         self.top_k = top_k
         self.top_p = top_p
         self.temperature = temperature
         self.chunk_size = chunk_size
         self.chunk_overlap = chunk_overlap
-        self.text_path = text_path
-        self.quantization = quantization
-        self.max_new_tokens = max_new_tokens
         self.hf_token = hf_token
-        if self.hf_token is not None:
-            os.environ["HF_TOKEN"] = str(self.hf_token)
         self._load_models()
-        self._create_document()
-        self._update_chroma_db()
-        self._get_retriever()
-        self._get_llm()
-        self._create_chain()
     def _load_models(self):
         try:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.chat_model_id)
-            bnb_config = None
             if self.quantization:
-                bnb_config = BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_use_double_quant=True,
-                    bnb_4bit_quant_type="nf4",
-                    bnb_4bit_compute_dtype=torch.float16,
-                )
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
-                    load_in_8bit=True,
-                    torch_dtype=torch.bfloat16,
                     device_map="auto",
-                    quantization_config=bnb_config,
                 )
             else:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
-                    torch_dtype=torch.bfloat16,
                     device_map="auto",
                 )
         except Exception as e:
             raise RuntimeError(f"Error loading chat model: {e}")
     def _create_document(self):
         try:
             with open(self.text_path, "r", encoding="utf-8") as file:

 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import warnings
 warnings.filterwarnings("ignore")
 class BanglaRAGChain:
     def __init__(self):
+        # Initialization code...
+        pass
+    def load(self, chat_model_id, embed_model_id, text_path, k, top_k, top_p, temperature, chunk_size, chunk_overlap, hf_token, max_new_tokens, quantization, offload_dir=None):
         self.chat_model_id = chat_model_id
         self.embed_model_id = embed_model_id
+        self.text_path = text_path
         self.k = k
         self.top_k = top_k
         self.top_p = top_p
         self.temperature = temperature
         self.chunk_size = chunk_size
         self.chunk_overlap = chunk_overlap
         self.hf_token = hf_token
+        self.max_new_tokens = max_new_tokens
+        self.quantization = quantization
+        self.offload_dir = offload_dir  # New parameter
+        # Load models
         self._load_models()
     def _load_models(self):
         try:
             if self.quantization:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
+                    torch_dtype="auto",
                     device_map="auto",
+                    load_in_4bit=True,
+                    offload_folder=self.offload_dir,  # Offload here
                 )
             else:
                 self.chat_model = AutoModelForCausalLM.from_pretrained(
                     self.chat_model_id,
                     device_map="auto",
+                    offload_folder=self.offload_dir,  # Offload here
                 )
+            self.tokenizer = AutoTokenizer.from_pretrained(self.chat_model_id)
         except Exception as e:
             raise RuntimeError(f"Error loading chat model: {e}")
     def _create_document(self):
         try:
             with open(self.text_path, "r", encoding="utf-8") as file: