Spaces:
Running
on
Zero
Running
on
Zero
import os | |
from .clip_encoder import CLIPVisionTower, CLIPVisionTowerS2 | |
from .siglip_encoder import SigLipVisionTower | |
# from .eva_clip.eva_clip_encoder import EvaClipVisionTower | |
# from .dev_eva_clip.eva_vit import EvaViTWrapper | |
def build_vision_tower(vision_tower_cfg, **kwargs): | |
vision_tower = getattr( | |
vision_tower_cfg, | |
"mm_vision_tower", | |
getattr(vision_tower_cfg, "vision_tower", None), | |
) | |
is_absolute_path_exists = os.path.exists(vision_tower) | |
use_s2 = getattr(vision_tower_cfg, "s2", False) | |
# if is_absolute_path_exists or vision_tower.startswith("openai") or vision_tower.startswith("laion") or "ShareGPT4V" in vision_tower: | |
if ( | |
vision_tower.startswith("openai") | |
or vision_tower.startswith("laion") | |
or "ShareGPT4V" in vision_tower | |
): | |
if use_s2: | |
return CLIPVisionTowerS2(vision_tower, args=vision_tower_cfg, **kwargs) | |
else: | |
return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) | |
elif ( | |
"siglip" in vision_tower.lower() | |
or "open_clip_pytorch_model.bin" in vision_tower | |
): | |
return SigLipVisionTower( | |
vision_tower, vision_tower_cfg=vision_tower_cfg, **kwargs | |
) | |
raise ValueError(f"Unknown vision tower: {vision_tower}") | |