Spaces:
Running
Running
Update cumo/model/multimodal_encoder/clip_encoder.py
Browse files
cumo/model/multimodal_encoder/clip_encoder.py
CHANGED
@@ -86,7 +86,8 @@ class CLIPVisionTower(nn.Module):
|
|
86 |
for image in images:
|
87 |
dev = image.device if hasattr(image, "device") else torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
88 |
dt = image.dtype if hasattr(image, "dtype") else torch.float16
|
89 |
-
|
|
|
90 |
image_feature = self.feature_select(image_forward_out).to(image.dtype)
|
91 |
image_features.append(image_feature)
|
92 |
|
|
|
86 |
for image in images:
|
87 |
dev = image.device if hasattr(image, "device") else torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
88 |
dt = image.dtype if hasattr(image, "dtype") else torch.float16
|
89 |
+
print("Image shape before vision_model:", image.shape)
|
90 |
+
image_forward_out = self.vision_model(image.to(device=dev, dtype=dt))
|
91 |
image_feature = self.feature_select(image_forward_out).to(image.dtype)
|
92 |
image_features.append(image_feature)
|
93 |
|