#import torch #import clip #class CLIPImageEncoder: # def __init__(self, device="cpu"): # self.device = device # self.model, self.preprocess = clip.load("ViT-B/16", device=device) # # def encode_image(self, image_pil): # print("Encoding image with CLIP") # with torch.no_grad(): # image_preprocessed = self.preprocess(image_pil).unsqueeze(0).to(self.device) # image_features = self.model.encode_image(image_preprocessed) # return image_features.cpu().numpy()[0]