Upload files with `vila-upload`.
Browse files- siglip_encoder.py +6 -2
siglip_encoder.py
CHANGED
@@ -19,12 +19,16 @@ import torch.nn as nn
|
|
19 |
import torch.nn.functional as F
|
20 |
from accelerate.hooks import add_hook_to_module
|
21 |
from einops import rearrange
|
22 |
-
|
23 |
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
|
24 |
from transformers.image_processing_utils import BaseImageProcessor
|
25 |
-
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
26 |
from transformers.models.siglip import SiglipVisionModel
|
27 |
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
class VisionTower(nn.Module):
|
30 |
def __init__(self, vision_tower, args, delay_load=False):
|
|
|
19 |
import torch.nn.functional as F
|
20 |
from accelerate.hooks import add_hook_to_module
|
21 |
from einops import rearrange
|
22 |
+
|
23 |
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
|
24 |
from transformers.image_processing_utils import BaseImageProcessor
|
|
|
25 |
from transformers.models.siglip import SiglipVisionModel
|
26 |
|
27 |
+
from s2wrapper import forward as multiscale_forward
|
28 |
+
|
29 |
+
# from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
30 |
+
def is_deepspeed_zero3_enabled():
|
31 |
+
return False
|
32 |
|
33 |
class VisionTower(nn.Module):
|
34 |
def __init__(self, vision_tower, args, delay_load=False):
|