CLIP-image-search

Runtime error

App Files Files Community

Catherine ZHOU commited on Apr 4, 2023

Commit

57ffe1d

1 Parent(s): d6526de

add dropdown option to image

Browse files

Files changed (1) hide show

app.py +32 -20

app.py CHANGED Viewed

@@ -12,6 +12,13 @@ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 #Open the precomputed embeddings
 emb_filename = 'unsplash-25k-photos-embeddings.pkl'
 with open(emb_filename, 'rb') as fIn:
@@ -20,12 +27,13 @@ with open(emb_filename, 'rb') as fIn:
         #print(f'img_names: {print(img_names)}')
-def search_text(query, top_k=1):
     """" Search an image based on the text query.
     Args:
         query ([string]): [query you want search for]
         top_k (int, optional): [Amount of images o return]. Defaults to 1.
     Returns:
         [list]: [list of images that are related to the query.]
@@ -33,38 +41,42 @@ def search_text(query, top_k=1):
     # First, we encode the query.
     inputs = tokenizer([query],  padding=True, return_tensors="pt")
     query_emb = model.get_text_features(**inputs)
     # Then, we use the util.semantic_search function, which computes the cosine-similarity
     # between the query embedding and all image embeddings.
     # It then returns the top_k highest ranked images, which we output
     hits = util.semantic_search(query_emb, img_emb, top_k=top_k)[0]
-    image=[]
     for hit in hits:
         #print(img_names[hit['corpus_id']])
-        object = Image.open(os.path.join("photos/", img_names[hit['corpus_id']]))
         image.append(object)
         #print(f'array length is: {len(image)}')
-    return image
 iface = gr.Interface(
     title = "Text to Image using CLIP Model 📸",
-    description = "Gradio Demo fo CLIP model. \n This demo is based on assessment for the 🤗  Huggingface course 2. \n To use it, simply write which image you are looking for. Read more at the links below.",
     article = "You find more information about this demo on my ✨ github repository [marcelcastrobr](https://github.com/marcelcastrobr/huggingface_course2)",
     fn=search_text,
-    inputs=[gr.Textbox(lines=4,
-        label="Write what you are looking for in an image...",
-        placeholder="Text Here..."),
-        gr.Slider(0, 5, step=1)],
-    outputs=[gr.Gallery(
-            label="Generated images", show_label=False, elem_id="gallery"
-        ).style(grid=[2], height="auto")]
-    ,examples=[[("Dog in the beach"), 2],
-        [("Paris during night."), 1],
-        [("A cute kangaroo"), 5],
-        [("Dois cachorros"), 2],
-        [("un homme marchant sur le parc"), 3],
-        [("et høyt fjell"), 2]]
     ).launch(debug=True)

 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+examples = [[("Dog in the beach"), 2],
+            [("Paris during night."), 1],
+            [("A cute kangaroo"), 5],
+            [("Dois cachorros"), 2],
+            [("un homme marchant sur le parc"), 3],
+            [("et høyt fjell"), 2]]
 #Open the precomputed embeddings
 emb_filename = 'unsplash-25k-photos-embeddings.pkl'
 with open(emb_filename, 'rb') as fIn:
         #print(f'img_names: {print(img_names)}')
+def search_text(query, top_k=1, top_rel_image=1):
     """" Search an image based on the text query.
     Args:
         query ([string]): [query you want search for]
         top_k (int, optional): [Amount of images o return]. Defaults to 1.
+        top_rel_image (int, optional): [Relevance label of the image]. Defaults to 1
     Returns:
         [list]: [list of images that are related to the query.]
     # First, we encode the query.
     inputs = tokenizer([query],  padding=True, return_tensors="pt")
     query_emb = model.get_text_features(**inputs)
     # Then, we use the util.semantic_search function, which computes the cosine-similarity
     # between the query embedding and all image embeddings.
     # It then returns the top_k highest ranked images, which we output
     hits = util.semantic_search(query_emb, img_emb, top_k=top_k)[0]
+    image = []
     for hit in hits:
         #print(img_names[hit['corpus_id']])
+        object = Image.open(os.path.join(
+            "photos/", img_names[hit['corpus_id']]))
         image.append(object)
         #print(f'array length is: {len(image)}')
+    ret_indx = int(top_rel_image)
+    if ret_indx > top_k:
+        raise IndexError("given relevance image label is out of range")
+    else:
+        return image[ret_indx-1]
 iface = gr.Interface(
     title = "Text to Image using CLIP Model 📸",
+    description = "My version of the Gradio Demo fo CLIP model. \n This demo is based on assessment for the 🤗  Huggingface course 2. \n To use it, simply write which image you are looking for. Read more at the links below.",
     article = "You find more information about this demo on my ✨ github repository [marcelcastrobr](https://github.com/marcelcastrobr/huggingface_course2)",
     fn=search_text,
+    inputs=[
+        gr.Textbox(lines=4,
+            label="Write what you are looking for in an image...",
+            placeholder="Text Here..."),
+        gr.Slider(0, 5, step=1),
+        gr.Dropdown(list(range(0, 6)), multiselect=False,
+                    label="Relevance Image Label")
+    ],
+    outputs=[gr.Image(
+            label="Generated images", show_label=False, elem_id="output image"
+        ).style(height="auto", width="auto")]
+    ,examples=examples
     ).launch(debug=True)