Spaces:

Advait3009
/

Multimodal-rag-chatbot

Sleeping

Advait3009 commited on May 28

Commit

a558a96

verified ·

1 Parent(s): 2821092

Update utils/model_loader.py

Files changed (1) hide show

utils/model_loader.py CHANGED Viewed

@@ -1,24 +1,26 @@
-from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
 import torch
 from typing import Optional
 def load_llava_model():
     """Load LLaVA model with 4-bit quantization for HF Spaces"""
     model_id = "llava-hf/llava-1.5-7b-hf"
     return pipeline(
         "image-to-text",
         model=model_id,
         device_map="auto",
         model_kwargs={
             "torch_dtype": torch.float16,
-            "load_in_4bit": True,
-            "quantization_config": {
-                "load_in_4bit": True,
-                "bnb_4bit_compute_dtype": torch.float16,
-                "bnb_4bit_use_double_quant": True,
-                "bnb_4bit_quant_type": "nf4"
-            }
         }
     )
@@ -34,16 +36,19 @@ def load_caption_model():
 def load_retrieval_models():
     """Load encoders with shared weights"""
     models = {}
     models['text_encoder'] = SentenceTransformer(
         'sentence-transformers/all-MiniLM-L6-v2',
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
     models['image_encoder'] = AutoModel.from_pretrained(
         "openai/clip-vit-base-patch32",
         device_map="auto",
         torch_dtype=torch.float16
     )
     return models

+from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
 import torch
 from typing import Optional
 def load_llava_model():
     """Load LLaVA model with 4-bit quantization for HF Spaces"""
     model_id = "llava-hf/llava-1.5-7b-hf"
+    quant_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4"
+    )
     return pipeline(
         "image-to-text",
         model=model_id,
+        tokenizer=model_id,
         device_map="auto",
         model_kwargs={
             "torch_dtype": torch.float16,
+            "quantization_config": quant_config
         }
     )
 def load_retrieval_models():
     """Load encoders with shared weights"""
+    from sentence_transformers import SentenceTransformer
+    from transformers import AutoModel
     models = {}
     models['text_encoder'] = SentenceTransformer(
         'sentence-transformers/all-MiniLM-L6-v2',
         device="cuda" if torch.cuda.is_available() else "cpu"
     )
     models['image_encoder'] = AutoModel.from_pretrained(
         "openai/clip-vit-base-patch32",
         device_map="auto",
         torch_dtype=torch.float16
     )
     return models