Vintern-3B-Demo

Running on Zero

qnguyen3 commited on Apr 8

Commit

e8306f2

•

1 Parent(s): 6c81b3d

Update modeling_llava_qwen2.py

Files changed (1) hide show

modeling_llava_qwen2.py CHANGED Viewed

@@ -662,14 +662,14 @@ class LlavaMetaForCausalLM(ABC):
         return self.get_model().get_vision_tower()
     def encode_images(self, images):
-        image_features = self.get_model().get_vision_tower()(images)
-        image_features = self.get_model().mm_projector(image_features.to('cuda'))
         return image_features
     def prepare_inputs_labels_for_multimodal(
             self, input_ids, position_ids, attention_mask, past_key_values, labels, images
     ):
-        vision_tower = self.get_vision_tower()
         if vision_tower is None or images is None or input_ids.shape[1] == 1:
             if past_key_values is not None and vision_tower is not None and images is not None and input_ids.shape[
                 1] == 1:

         return self.get_model().get_vision_tower()
     def encode_images(self, images):
+        image_features = self.get_model().get_vision_tower().cuda()(images)
+        image_features = self.get_model().mm_projector(image_features)
         return image_features
     def prepare_inputs_labels_for_multimodal(
             self, input_ids, position_ids, attention_mask, past_key_values, labels, images
     ):
+        vision_tower = self.get_vision_tower().cuda()
         if vision_tower is None or images is None or input_ids.shape[1] == 1:
             if past_key_values is not None and vision_tower is not None and images is not None and input_ids.shape[
                 1] == 1: