from transformers import CLIPVisionModel, CLIPImageProcessor, CLIPVisionConfig | |
from . import register_vision_tower | |
from .base import VisionTower | |
class CLIPVisionTower(VisionTower): | |
def __init__(self, cfg): | |
super().__init__(cfg) | |
self._vision_tower = CLIPVisionModel(cfg) | |
self._image_processor = CLIPImageProcessor.from_pretrained(cfg.model_name_or_path) | |