cbir-image-similarity

Runtime error

App Files Files Community

bendeguzszabo commited on Mar 8, 2023

Commit

6d93dbe

1 Parent(s): 83942d1

Update src/CLIP.py

Browse files

Files changed (1) hide show

src/CLIP.py +30 -1

src/CLIP.py CHANGED Viewed

@@ -3,18 +3,47 @@ import torch
 class CLIPImageEncoder:
     def __init__(self, device="cpu"):
         self.device = device
         self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
         self.processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
     def encode_image(self, image_pil):
         with torch.no_grad():
             input = self.processor(images=image_pil, return_tensors="pt")
             image_features = self.model.get_image_features(**input)
             return image_features.cpu().detach().numpy()[0]
     def encode_images(self, batch):
         images = batch["image"]
         input = self.processor(images=images, return_tensors="pt")
         with torch.no_grad():

 class CLIPImageEncoder:
+    """
+    A class for encoding images using the CLIP model.
+    Args:
+        device (str): The device to run the model on (default: "cpu").
+    Attributes:
+        device (str): The device to run the model on.
+        model (CLIPModel): The CLIP model used for image encoding.
+        processor (AutoProcessor): The tokenizer and input processor for the CLIP model.
+    """
     def __init__(self, device="cpu"):
         self.device = device
         self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
         self.processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
     def encode_image(self, image_pil):
+        """
+        Encodes a single image using the CLIP model.
+        Args:
+            image_pil: A PIL Image object representing the image to encode.
+        Returns:
+            numpy.ndarray: The CLIP embedding for the image.
+        """
         with torch.no_grad():
             input = self.processor(images=image_pil, return_tensors="pt")
             image_features = self.model.get_image_features(**input)
             return image_features.cpu().detach().numpy()[0]
     def encode_images(self, batch):
+        """
+        Encodes a batch of images using the CLIP model.
+        Args:
+            batch (Dict[str, Any]): A dictionary containing the batch of images to encode.
+        Returns:
+            Dict[str, Any]: A dictionary containing the CLIP embeddings for the batch of images.
+        """
         images = batch["image"]
         input = self.processor(images=images, return_tensors="pt")
         with torch.no_grad():