Spaces:

nomadicsynth
/

openclip-embed

Sleeping

RoboApocalypse commited on Jan 21, 2024

Commit

d85d411

1 Parent(s): db03f5d

Add functions to generate text and image embeddings

Add function to generate embeddings for text and image data

Update Gradio interface to include hidden base64 encoded image input

Update Gradio interface to include hidden base64 encoded image embedding output

Files changed (1) hide show

app.py +100 -15

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import gradio as gr
 from numpy import empty
 import open_clip
-from regex import F
 import torch
-import json
-import PIL
 # Set device to GPU if available
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -20,25 +20,20 @@ model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(
 )
-def generate_embedding(text_data, image_data):
     """
-    Generate embeddings for text and image data using the OpenCLIP model.
     Parameters
     ----------
     text_data : str or tuple of str
         Text data to embed.
-    image_data : PIL.Image.Image or tuple of PIL.Image.Image
-        Image data to embed.
     Returns
     -------
     text_embeddings : list of str
         List of text embeddings.
-    image_embeddings : list of str
-        List of image embeddings.
-    similarity : list of str
-        List of cosine similarity between text and image embeddings.
     """
     # Embed text data
@@ -53,6 +48,10 @@ def generate_embedding(text_data, image_data):
         if isinstance(text_data, tuple):
             text_data = list(text_data)
         # Keep track of indices of empty text strings
         empty_data_indices = [i for i, text in enumerate(text_data) if text == ""]
@@ -74,12 +73,30 @@ def generate_embedding(text_data, image_data):
         for i in empty_data_indices:
             text_embeddings.insert(i, "")
     # Embed image data
     image_embeddings = []
     empty_data_indices = []
     if image_data:
         # If image_data is a single PIL image, convert to list of PIL images
-        if isinstance(image_data, PIL.Image.Image):
             image_data = [image_data]
         # If image_data is a tuple of images, convert to list of images
@@ -108,6 +125,41 @@ def generate_embedding(text_data, image_data):
         for i in empty_data_indices:
             image_embeddings.insert(i, "")
     # Calculate cosine similarity between text and image embeddings
     similarity = []
     empty_data_indices = []
@@ -141,7 +193,38 @@ def generate_embedding(text_data, image_data):
         for i in empty_data_indices:
             similarity.insert(i, "")
-    return (text_embeddings, image_embeddings, similarity)
 # Define Gradio interface
@@ -149,12 +232,14 @@ demo = gr.Interface(
     fn=generate_embedding,
     inputs=[
         gr.Textbox(lines=5, max_lines=5, placeholder="Enter Text Here...", label="Text to Embed"),
-        gr.Image(height=512, type="pil", label="Image to Embed")
     ],
     outputs=[
         gr.Textbox(lines=5, max_lines=5, label="Text Embedding", autoscroll=False),
         gr.Textbox(lines=5, max_lines=5, label="Image Embedding", autoscroll=False),
-        gr.Textbox(label="Cosine Similarity")
     ],
     title="OpenCLIP Embedding Generator",
     description="Generate embeddings using OpenCLIP model for text and images.",

 import gradio as gr
 from numpy import empty
 import open_clip
 import torch
+import PIL.Image as Image
+from io import BytesIO
+import base64
 # Set device to GPU if available
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 )
+# Define function to generate text embeddings
+def generate_text_embedding(text_data):
     """
+    Generate embeddings for text data using the OpenCLIP model.
     Parameters
     ----------
     text_data : str or tuple of str
         Text data to embed.
     Returns
     -------
     text_embeddings : list of str
         List of text embeddings.
     """
     # Embed text data
         if isinstance(text_data, tuple):
             text_data = list(text_data)
+        # If text_data is not a list of strings, raise error
+        if not isinstance(text_data, list):
+            raise TypeError("text_data must be a string or a tuple of strings.")
         # Keep track of indices of empty text strings
         empty_data_indices = [i for i, text in enumerate(text_data) if text == ""]
         for i in empty_data_indices:
             text_embeddings.insert(i, "")
+    return text_embeddings
+# Define function to generate image embeddings
+def generate_image_embedding(image_data):
+    """
+    Generate embeddings for image data using the OpenCLIP model.
+    Parameters
+    ----------
+    image_data : PIL.Image.Image or tuple of PIL.Image.Image
+        Image data to embed.
+    Returns
+    -------
+    image_embeddings : list of str
+        List of image embeddings.
+    """
     # Embed image data
     image_embeddings = []
     empty_data_indices = []
     if image_data:
         # If image_data is a single PIL image, convert to list of PIL images
+        if isinstance(image_data, Image.Image):
             image_data = [image_data]
         # If image_data is a tuple of images, convert to list of images
         for i in empty_data_indices:
             image_embeddings.insert(i, "")
+    return image_embeddings
+# Define function to generate embeddings
+def generate_embedding(text_data, image_data, image_data_base64):
+    """
+    Generate embeddings for text and image data using the OpenCLIP model.
+    Parameters
+    ----------
+    text_data : str or tuple of str
+        Text data to embed.
+    image_data : PIL.Image.Image or tuple of PIL.Image.Image
+        Image data to embed.
+    image_data_base64 : str or tuple of str
+        Base64 encoded image data to embed.
+    Returns
+    -------
+    text_embeddings : list of str
+        List of text embeddings.
+    image_embeddings : list of str
+        List of image embeddings.
+    similarity : list of str
+        List of cosine similarity between text and image embeddings.
+    image_data_base64_embeddings : str or tuple of str
+        List of image embeddings for base64 encoded image data.
+    """
+    # Embed text data
+    text_embeddings = generate_text_embedding(text_data)
+    # Embed image data
+    image_embeddings = generate_image_embedding(image_data)
     # Calculate cosine similarity between text and image embeddings
     similarity = []
     empty_data_indices = []
         for i in empty_data_indices:
             similarity.insert(i, "")
+    # Embed base64 encoded image data
+    decoded_image_data = []
+    if image_data_base64:
+        # If image_data_base64 is a string, convert to list of strings
+        if isinstance(image_data_base64, str):
+            image_data_base64 = [image_data_base64]
+        # If image_data_base64 is a tuple of strings, convert to list of strings
+        if isinstance(image_data_base64, tuple):
+            image_data_base64 = list(image_data_base64)
+        # If image_data_base64 is not a list of strings, raise error
+        if not isinstance(image_data_base64, list):
+            raise TypeError("image_data_base64 must be a string or a tuple of strings.")
+        # Keep track of indices of empty image strings
+        empty_data_indices = [i for i, img in enumerate(image_data_base64) if img == ""]
+        # Remove empty image strings
+        image_data_base64 = [img for img in image_data_base64 if img != ""]
+        if image_data_base64:
+            # Decode base64 encoded image data
+            decoded_image_data = [Image.open(BytesIO(base64.b64decode(img))) for img in image_data_base64]
+        # Insert empty strings at indices of empty image strings
+        for i in empty_data_indices:
+            decoded_image_data.insert(i, None)
+    image_data_base64_embeddings = generate_image_embedding(tuple(decoded_image_data))
+    return (text_embeddings, image_embeddings, similarity, image_data_base64_embeddings)
 # Define Gradio interface
     fn=generate_embedding,
     inputs=[
         gr.Textbox(lines=5, max_lines=5, placeholder="Enter Text Here...", label="Text to Embed"),
+        gr.Image(height=512, type="pil", label="Image to Embed"),
+        gr.Textbox(label="Base64 Encoded Image", visible=False)
     ],
     outputs=[
         gr.Textbox(lines=5, max_lines=5, label="Text Embedding", autoscroll=False),
         gr.Textbox(lines=5, max_lines=5, label="Image Embedding", autoscroll=False),
+        gr.Textbox(label="Cosine Similarity"),
+        gr.Textbox(label="Embedding of Base64 Encoded Images", visible=False)
     ],
     title="OpenCLIP Embedding Generator",
     description="Generate embeddings using OpenCLIP model for text and images.",