Upload files with `vila-upload`.
Browse filesUpload mm_utils.py
Upload siglip_encoder.py
- mm_utils.py +1 -1
- siglip_encoder.py +6 -2
mm_utils.py
CHANGED
@@ -26,7 +26,7 @@ import torch
|
|
26 |
from PIL import Image
|
27 |
from transformers import StoppingCriteria
|
28 |
|
29 |
-
from
|
30 |
|
31 |
|
32 |
def get_frame_from_vcap(vidcap, num_frames=10, max_fps=0.0, fps=None, frame_count=None, video_file_name=None):
|
|
|
26 |
from PIL import Image
|
27 |
from transformers import StoppingCriteria
|
28 |
|
29 |
+
from .constants import DEFAULT_IMAGE_TOKEN
|
30 |
|
31 |
|
32 |
def get_frame_from_vcap(vidcap, num_frames=10, max_fps=0.0, fps=None, frame_count=None, video_file_name=None):
|
siglip_encoder.py
CHANGED
@@ -19,12 +19,16 @@ import torch.nn as nn
|
|
19 |
import torch.nn.functional as F
|
20 |
from accelerate.hooks import add_hook_to_module
|
21 |
from einops import rearrange
|
22 |
-
|
23 |
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
|
24 |
from transformers.image_processing_utils import BaseImageProcessor
|
25 |
-
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
26 |
from transformers.models.siglip import SiglipVisionModel
|
27 |
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
class VisionTower(nn.Module):
|
30 |
def __init__(self, vision_tower, args, delay_load=False):
|
|
|
19 |
import torch.nn.functional as F
|
20 |
from accelerate.hooks import add_hook_to_module
|
21 |
from einops import rearrange
|
22 |
+
|
23 |
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
|
24 |
from transformers.image_processing_utils import BaseImageProcessor
|
|
|
25 |
from transformers.models.siglip import SiglipVisionModel
|
26 |
|
27 |
+
from s2wrapper import forward as multiscale_forward
|
28 |
+
|
29 |
+
# from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
30 |
+
def is_deepspeed_zero3_enabled():
|
31 |
+
return False
|
32 |
|
33 |
class VisionTower(nn.Module):
|
34 |
def __init__(self, vision_tower, args, delay_load=False):
|