Spaces:

Yiyuan
/

VSA

Runtime error

App Files Files Community

cnzzx commited on Oct 30, 2024

Commit

7b85afa

1 Parent(s): f16f78e

update

Browse files

Files changed (1) hide show

models/vsa_model.py +13 -30

models/vsa_model.py CHANGED Viewed

@@ -41,7 +41,8 @@ from lmdeploy.messages import PytorchEngineConfig
 from typing import List, Union
 SEARCH_MODEL_NAMES = {
-    'internlm2_5-7b-chat': 'internlm2'
 }
@@ -125,7 +126,7 @@ class VLM:
         load_8bit: bool = False,
         load_4bit: bool = True,
         temperature: float = 0.2,
-        max_new_tokens: int = 2000,
     ):
         disable_torch_init()
         model_name = get_model_name_from_path(model_path)
@@ -325,6 +326,16 @@ class VisionSearchAssistant:
         self.searcher = WebSearcher(
             model_path = self.search_model
         )
     def app_run(
         self,
@@ -352,10 +363,6 @@ class VisionSearchAssistant:
             raise Exception('Unsupported input image format.')
         # Visual Grounding
-        self.grounder = VisualGrounder(
-            model_path = self.ground_model,
-            device = self.ground_device,
-        )
         bboxes, labels, out_image = self.grounder(in_image, classes = ground_classes)
         yield out_image, 'ground'
@@ -370,17 +377,7 @@ class VisionSearchAssistant:
             det_images.append(in_image)
             labels.append('image')
-        del self.grounder
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
         # Visual Captioning
-        self.vlm = VLM(
-            model_path = self.vlm_model,
-            device = self.vlm_device,
-            load_4bit = self.vlm_load_4bit,
-            load_8bit = self.vlm_load_8bit
-        )
         captions = []
         for det_image, label in zip(det_images, labels):
             inp = get_caption_prompt(label, text)
@@ -414,21 +411,11 @@ class VisionSearchAssistant:
         queries = [text + " " + query for query in queries]
-        del self.vlm
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
         # Web Searching
         contexts = self.searcher(queries)
         yield contexts, 'search'
         # QA
-        self.vlm = VLM(
-            model_path = self.vlm_model,
-            device = self.vlm_device,
-            load_4bit = self.vlm_load_4bit,
-            load_8bit = self.vlm_load_8bit
-        )
         TOKEN_LIMIT = 3500
         max_length_per_context = TOKEN_LIMIT // len(contexts)
         for cid, context in enumerate(contexts):
@@ -442,7 +429,3 @@ class VisionSearchAssistant:
         print(answer)
         yield answer, 'answer'
-        del self.vlm
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()

 from typing import List, Union
 SEARCH_MODEL_NAMES = {
+    'internlm2_5-7b-chat': 'internlm2',
+    'internlm2_5-1_8b-chat': 'internlm2'
 }
         load_8bit: bool = False,
         load_4bit: bool = True,
         temperature: float = 0.2,
+        max_new_tokens: int = 1024,
     ):
         disable_torch_init()
         model_name = get_model_name_from_path(model_path)
         self.searcher = WebSearcher(
             model_path = self.search_model
         )
+        self.grounder = VisualGrounder(
+            model_path = self.ground_model,
+            device = self.ground_device,
+        )
+        self.vlm = VLM(
+            model_path = self.vlm_model,
+            device = self.vlm_device,
+            load_4bit = self.vlm_load_4bit,
+            load_8bit = self.vlm_load_8bit
+        )
     def app_run(
         self,
             raise Exception('Unsupported input image format.')
         # Visual Grounding
         bboxes, labels, out_image = self.grounder(in_image, classes = ground_classes)
         yield out_image, 'ground'
             det_images.append(in_image)
             labels.append('image')
         # Visual Captioning
         captions = []
         for det_image, label in zip(det_images, labels):
             inp = get_caption_prompt(label, text)
         queries = [text + " " + query for query in queries]
         # Web Searching
         contexts = self.searcher(queries)
         yield contexts, 'search'
         # QA
         TOKEN_LIMIT = 3500
         max_length_per_context = TOKEN_LIMIT // len(contexts)
         for cid, context in enumerate(contexts):
         print(answer)
         yield answer, 'answer'