Ligeng-Zhu commited on
Commit
2182763
·
verified ·
1 Parent(s): fa0bd64

Upload files with `vila-upload`.

Browse files

Upload mm_utils.py
Upload siglip_encoder.py

Files changed (2) hide show
  1. mm_utils.py +1 -1
  2. siglip_encoder.py +6 -2
mm_utils.py CHANGED
@@ -26,7 +26,7 @@ import torch
26
  from PIL import Image
27
  from transformers import StoppingCriteria
28
 
29
- from llava.constants import DEFAULT_IMAGE_TOKEN
30
 
31
 
32
  def get_frame_from_vcap(vidcap, num_frames=10, max_fps=0.0, fps=None, frame_count=None, video_file_name=None):
 
26
  from PIL import Image
27
  from transformers import StoppingCriteria
28
 
29
+ from .constants import DEFAULT_IMAGE_TOKEN
30
 
31
 
32
  def get_frame_from_vcap(vidcap, num_frames=10, max_fps=0.0, fps=None, frame_count=None, video_file_name=None):
siglip_encoder.py CHANGED
@@ -19,12 +19,16 @@ import torch.nn as nn
19
  import torch.nn.functional as F
20
  from accelerate.hooks import add_hook_to_module
21
  from einops import rearrange
22
- from s2wrapper import forward as multiscale_forward
23
  from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
24
  from transformers.image_processing_utils import BaseImageProcessor
25
- from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
26
  from transformers.models.siglip import SiglipVisionModel
27
 
 
 
 
 
 
28
 
29
  class VisionTower(nn.Module):
30
  def __init__(self, vision_tower, args, delay_load=False):
 
19
  import torch.nn.functional as F
20
  from accelerate.hooks import add_hook_to_module
21
  from einops import rearrange
22
+
23
  from transformers import AutoConfig, PretrainedConfig, PreTrainedModel, SiglipImageProcessor
24
  from transformers.image_processing_utils import BaseImageProcessor
 
25
  from transformers.models.siglip import SiglipVisionModel
26
 
27
+ from s2wrapper import forward as multiscale_forward
28
+
29
+ # from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
30
+ def is_deepspeed_zero3_enabled():
31
+ return False
32
 
33
  class VisionTower(nn.Module):
34
  def __init__(self, vision_tower, args, delay_load=False):