BenkHel commited on
Commit
98866e7
·
verified ·
1 Parent(s): 0990d31

Update cumo/model/multimodal_encoder/clip_encoder.py

Browse files
cumo/model/multimodal_encoder/clip_encoder.py CHANGED
@@ -86,7 +86,8 @@ class CLIPVisionTower(nn.Module):
86
  for image in images:
87
  dev = image.device if hasattr(image, "device") else torch.device("cuda" if torch.cuda.is_available() else "cpu")
88
  dt = image.dtype if hasattr(image, "dtype") else torch.float16
89
- image_forward_out = self.vision_model(image.to(device=dev, dtype=dt).unsqueeze(0))
 
90
  image_feature = self.feature_select(image_forward_out).to(image.dtype)
91
  image_features.append(image_feature)
92
 
 
86
  for image in images:
87
  dev = image.device if hasattr(image, "device") else torch.device("cuda" if torch.cuda.is_available() else "cpu")
88
  dt = image.dtype if hasattr(image, "dtype") else torch.float16
89
+ print("Image shape before vision_model:", image.shape)
90
+ image_forward_out = self.vision_model(image.to(device=dev, dtype=dt))
91
  image_feature = self.feature_select(image_forward_out).to(image.dtype)
92
  image_features.append(image_feature)
93