Catherine ZHOU commited on
Commit
57ffe1d
·
1 Parent(s): d6526de

add dropdown option to image

Browse files
Files changed (1) hide show
  1. app.py +32 -20
app.py CHANGED
@@ -12,6 +12,13 @@ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
12
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
13
  tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
14
 
 
 
 
 
 
 
 
15
  #Open the precomputed embeddings
16
  emb_filename = 'unsplash-25k-photos-embeddings.pkl'
17
  with open(emb_filename, 'rb') as fIn:
@@ -20,12 +27,13 @@ with open(emb_filename, 'rb') as fIn:
20
  #print(f'img_names: {print(img_names)}')
21
 
22
 
23
- def search_text(query, top_k=1):
24
  """" Search an image based on the text query.
25
 
26
  Args:
27
  query ([string]): [query you want search for]
28
  top_k (int, optional): [Amount of images o return]. Defaults to 1.
 
29
 
30
  Returns:
31
  [list]: [list of images that are related to the query.]
@@ -33,38 +41,42 @@ def search_text(query, top_k=1):
33
  # First, we encode the query.
34
  inputs = tokenizer([query], padding=True, return_tensors="pt")
35
  query_emb = model.get_text_features(**inputs)
36
-
37
  # Then, we use the util.semantic_search function, which computes the cosine-similarity
38
  # between the query embedding and all image embeddings.
39
  # It then returns the top_k highest ranked images, which we output
40
  hits = util.semantic_search(query_emb, img_emb, top_k=top_k)[0]
41
-
42
- image=[]
43
  for hit in hits:
44
  #print(img_names[hit['corpus_id']])
45
- object = Image.open(os.path.join("photos/", img_names[hit['corpus_id']]))
 
46
  image.append(object)
47
  #print(f'array length is: {len(image)}')
48
 
49
- return image
 
 
 
 
50
 
51
 
52
  iface = gr.Interface(
53
  title = "Text to Image using CLIP Model 📸",
54
- description = "Gradio Demo fo CLIP model. \n This demo is based on assessment for the 🤗 Huggingface course 2. \n To use it, simply write which image you are looking for. Read more at the links below.",
55
  article = "You find more information about this demo on my ✨ github repository [marcelcastrobr](https://github.com/marcelcastrobr/huggingface_course2)",
56
  fn=search_text,
57
- inputs=[gr.Textbox(lines=4,
58
- label="Write what you are looking for in an image...",
59
- placeholder="Text Here..."),
60
- gr.Slider(0, 5, step=1)],
61
- outputs=[gr.Gallery(
62
- label="Generated images", show_label=False, elem_id="gallery"
63
- ).style(grid=[2], height="auto")]
64
- ,examples=[[("Dog in the beach"), 2],
65
- [("Paris during night."), 1],
66
- [("A cute kangaroo"), 5],
67
- [("Dois cachorros"), 2],
68
- [("un homme marchant sur le parc"), 3],
69
- [("et høyt fjell"), 2]]
70
  ).launch(debug=True)
 
12
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
13
  tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
14
 
15
+ examples = [[("Dog in the beach"), 2],
16
+ [("Paris during night."), 1],
17
+ [("A cute kangaroo"), 5],
18
+ [("Dois cachorros"), 2],
19
+ [("un homme marchant sur le parc"), 3],
20
+ [("et høyt fjell"), 2]]
21
+
22
  #Open the precomputed embeddings
23
  emb_filename = 'unsplash-25k-photos-embeddings.pkl'
24
  with open(emb_filename, 'rb') as fIn:
 
27
  #print(f'img_names: {print(img_names)}')
28
 
29
 
30
+ def search_text(query, top_k=1, top_rel_image=1):
31
  """" Search an image based on the text query.
32
 
33
  Args:
34
  query ([string]): [query you want search for]
35
  top_k (int, optional): [Amount of images o return]. Defaults to 1.
36
+ top_rel_image (int, optional): [Relevance label of the image]. Defaults to 1
37
 
38
  Returns:
39
  [list]: [list of images that are related to the query.]
 
41
  # First, we encode the query.
42
  inputs = tokenizer([query], padding=True, return_tensors="pt")
43
  query_emb = model.get_text_features(**inputs)
44
+
45
  # Then, we use the util.semantic_search function, which computes the cosine-similarity
46
  # between the query embedding and all image embeddings.
47
  # It then returns the top_k highest ranked images, which we output
48
  hits = util.semantic_search(query_emb, img_emb, top_k=top_k)[0]
49
+
50
+ image = []
51
  for hit in hits:
52
  #print(img_names[hit['corpus_id']])
53
+ object = Image.open(os.path.join(
54
+ "photos/", img_names[hit['corpus_id']]))
55
  image.append(object)
56
  #print(f'array length is: {len(image)}')
57
 
58
+ ret_indx = int(top_rel_image)
59
+ if ret_indx > top_k:
60
+ raise IndexError("given relevance image label is out of range")
61
+ else:
62
+ return image[ret_indx-1]
63
 
64
 
65
  iface = gr.Interface(
66
  title = "Text to Image using CLIP Model 📸",
67
+ description = "My version of the Gradio Demo fo CLIP model. \n This demo is based on assessment for the 🤗 Huggingface course 2. \n To use it, simply write which image you are looking for. Read more at the links below.",
68
  article = "You find more information about this demo on my ✨ github repository [marcelcastrobr](https://github.com/marcelcastrobr/huggingface_course2)",
69
  fn=search_text,
70
+ inputs=[
71
+ gr.Textbox(lines=4,
72
+ label="Write what you are looking for in an image...",
73
+ placeholder="Text Here..."),
74
+ gr.Slider(0, 5, step=1),
75
+ gr.Dropdown(list(range(0, 6)), multiselect=False,
76
+ label="Relevance Image Label")
77
+ ],
78
+ outputs=[gr.Image(
79
+ label="Generated images", show_label=False, elem_id="output image"
80
+ ).style(height="auto", width="auto")]
81
+ ,examples=examples
 
82
  ).launch(debug=True)