tulsi0897 commited on
Commit
a5e6371
Β·
1 Parent(s): 40f17af

adding datas and files

Browse files
README.md CHANGED
@@ -1,13 +1,38 @@
1
  ---
2
- title: Clip Image Search
3
- emoji: πŸš€
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: streamlit
7
- sdk_version: 1.27.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Clip Demo
3
+ emoji: πŸ‘
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: streamlit
7
+ sdk_version: 1.2.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ # Configuration
13
+
14
+ `title`: _string_
15
+ Display title for the Space
16
+
17
+ `emoji`: _string_
18
+ Space emoji (emoji-only character allowed)
19
+
20
+ `colorFrom`: _string_
21
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
22
+
23
+ `colorTo`: _string_
24
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
25
+
26
+ `sdk`: _string_
27
+ Can be either `gradio` or `streamlit`
28
+
29
+ `sdk_version` : _string_
30
+ Only applicable for `streamlit` SDK.
31
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
32
+
33
+ `app_file`: _string_
34
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code).
35
+ Path is relative to the root of the repository.
36
+
37
+ `pinned`: _boolean_
38
+ Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from html import escape
2
+ import re
3
+ import streamlit as st
4
+ import pandas as pd, numpy as np
5
+ import torch
6
+ from transformers import CLIPProcessor, CLIPModel
7
+ from st_clickable_images import clickable_images
8
+
9
+ MODEL_NAMES = [
10
+ # "base-patch32",
11
+ # "base-patch16",
12
+ # "large-patch14",
13
+ "large-patch14-336"
14
+ ]
15
+
16
+
17
+ @st.cache(allow_output_mutation=True)
18
+ def load():
19
+ df = {0: pd.read_csv("data.csv"), 1: pd.read_csv("data2.csv")}
20
+ models = {}
21
+ processors = {}
22
+ embeddings = {}
23
+ for name in MODEL_NAMES:
24
+ models[name] = CLIPModel.from_pretrained(f"openai/clip-vit-{name}").eval()
25
+ processors[name] = CLIPProcessor.from_pretrained(f"openai/clip-vit-{name}")
26
+ embeddings[name] = {
27
+ 0: np.load(f"embeddings-vit-{name}.npy"),
28
+ 1: np.load(f"embeddings2-vit-{name}.npy"),
29
+ }
30
+ for k in [0, 1]:
31
+ embeddings[name][k] = embeddings[name][k] / np.linalg.norm(
32
+ embeddings[name][k], axis=1, keepdims=True
33
+ )
34
+ return models, processors, df, embeddings
35
+
36
+
37
+ models, processors, df, embeddings = load()
38
+ source = {0: "\nSource: Unsplash", 1: "\nSource: The Movie Database (TMDB)"}
39
+
40
+
41
+ def compute_text_embeddings(list_of_strings, name):
42
+ inputs = processors[name](text=list_of_strings, return_tensors="pt", padding=True)
43
+ with torch.no_grad():
44
+ result = models[name].get_text_features(**inputs).detach().numpy()
45
+ return result / np.linalg.norm(result, axis=1, keepdims=True)
46
+
47
+
48
+ def image_search(query, corpus, name, n_results=24):
49
+ positive_embeddings = None
50
+
51
+ def concatenate_embeddings(e1, e2):
52
+ if e1 is None:
53
+ return e2
54
+ else:
55
+ return np.concatenate((e1, e2), axis=0)
56
+
57
+ splitted_query = query.split("EXCLUDING ")
58
+ dot_product = 0
59
+ k = 0 if corpus == "Unsplash" else 1
60
+ if len(splitted_query[0]) > 0:
61
+ positive_queries = splitted_query[0].split(";")
62
+ for positive_query in positive_queries:
63
+ match = re.match(r"\[(Movies|Unsplash):(\d{1,5})\](.*)", positive_query)
64
+ if match:
65
+ corpus2, idx, remainder = match.groups()
66
+ idx, remainder = int(idx), remainder.strip()
67
+ k2 = 0 if corpus2 == "Unsplash" else 1
68
+ positive_embeddings = concatenate_embeddings(
69
+ positive_embeddings, embeddings[name][k2][idx : idx + 1, :]
70
+ )
71
+ if len(remainder) > 0:
72
+ positive_embeddings = concatenate_embeddings(
73
+ positive_embeddings, compute_text_embeddings([remainder], name)
74
+ )
75
+ else:
76
+ positive_embeddings = concatenate_embeddings(
77
+ positive_embeddings, compute_text_embeddings([positive_query], name)
78
+ )
79
+ dot_product = embeddings[name][k] @ positive_embeddings.T
80
+ dot_product = dot_product - np.median(dot_product, axis=0)
81
+ dot_product = dot_product / np.max(dot_product, axis=0, keepdims=True)
82
+ dot_product = np.min(dot_product, axis=1)
83
+
84
+ if len(splitted_query) > 1:
85
+ negative_queries = (" ".join(splitted_query[1:])).split(";")
86
+ negative_embeddings = compute_text_embeddings(negative_queries, name)
87
+ dot_product2 = embeddings[name][k] @ negative_embeddings.T
88
+ dot_product2 = dot_product2 - np.median(dot_product2, axis=0)
89
+ dot_product2 = dot_product2 / np.max(dot_product2, axis=0, keepdims=True)
90
+ dot_product -= np.max(np.maximum(dot_product2, 0), axis=1)
91
+
92
+ results = np.argsort(dot_product)[-1 : -n_results - 1 : -1]
93
+ return [
94
+ (
95
+ df[k].iloc[i]["path"],
96
+ df[k].iloc[i]["tooltip"] + source[k],
97
+ i,
98
+ )
99
+ for i in results
100
+ ]
101
+
102
+
103
+ description = """
104
+ # Semantic image search
105
+
106
+ **Enter your query and hit enter**
107
+
108
+ *Built with OpenAI's [CLIP](https://openai.com/blog/clip/) model, πŸ€— Hugging Face's [transformers library](https://huggingface.co/transformers/), [Streamlit](https://streamlit.io/), 25k images from [Unsplash](https://unsplash.com/) and 8k images from [The Movie Database (TMDB)](https://www.themoviedb.org/)*
109
+
110
+ *Inspired by [Unsplash Image Search](https://github.com/haltakov/natural-language-image-search) from Vladimir Haltakov and [Alph, The Sacred River](https://github.com/thoppe/alph-the-sacred-river) from Travis Hoppe*
111
+ """
112
+
113
+ howto = """
114
+ - Click on an image to use it as a query and find similar images
115
+ - Several queries, including one based on an image, can be combined (use "**;**" as a separator)
116
+ - If the input includes "**EXCLUDING**", the part right of it will be used as a negative query
117
+ """
118
+
119
+ div_style = {
120
+ "display": "flex",
121
+ "justify-content": "center",
122
+ "flex-wrap": "wrap",
123
+ }
124
+
125
+
126
+ def main():
127
+ st.markdown(
128
+ """
129
+ <style>
130
+ .block-container{
131
+ max-width: 1200px;
132
+ }
133
+ div.row-widget.stRadio > div{
134
+ flex-direction:row;
135
+ display: flex;
136
+ justify-content: center;
137
+ }
138
+ div.row-widget.stRadio > div > label{
139
+ margin-left: 5px;
140
+ margin-right: 5px;
141
+ }
142
+ .row-widget {
143
+ margin-top: -25px;
144
+ }
145
+ section>div:first-child {
146
+ padding-top: 30px;
147
+ }
148
+ div.reportview-container > section:first-child{
149
+ max-width: 320px;
150
+ }
151
+ #MainMenu {
152
+ visibility: hidden;
153
+ }
154
+ footer {
155
+ visibility: hidden;
156
+ }
157
+ </style>""",
158
+ unsafe_allow_html=True,
159
+ )
160
+ st.sidebar.markdown(description)
161
+ with st.sidebar.expander("Advanced use"):
162
+ st.markdown(howto)
163
+ # mode = st.sidebar.selectbox(
164
+ # "", ["Results for ViT-L/14@336px", "Comparison of 2 models"], index=0
165
+ # )
166
+
167
+ _, c, _ = st.columns((1, 3, 1))
168
+ if "query" in st.session_state:
169
+ query = c.text_input("", value=st.session_state["query"])
170
+ else:
171
+ query = c.text_input("", value="clouds at sunset")
172
+ corpus = st.radio("", ["Unsplash", "Movies"])
173
+
174
+ models_dict = {
175
+ "ViT-B/32 (quicker)": "base-patch32",
176
+ "ViT-B/16 (average)": "base-patch16",
177
+ # "ViT-L/14 (slow)": "large-patch14",
178
+ "ViT-L/14@336px (slower)": "large-patch14-336",
179
+ }
180
+
181
+ if False: # "Comparison" in mode:
182
+ c1, c2 = st.columns((1, 1))
183
+ selection1 = c1.selectbox("", models_dict.keys(), index=0)
184
+ selection2 = c2.selectbox("", models_dict.keys(), index=2)
185
+ name1 = models_dict[selection1]
186
+ name2 = models_dict[selection2]
187
+ else:
188
+ name1 = MODEL_NAMES[-1]
189
+
190
+ if len(query) > 0:
191
+ results1 = image_search(query, corpus, name1)
192
+ if False: # "Comparison" in mode:
193
+ with c1:
194
+ clicked1 = clickable_images(
195
+ [result[0] for result in results1],
196
+ titles=[result[1] for result in results1],
197
+ div_style=div_style,
198
+ img_style={"margin": "2px", "height": "150px"},
199
+ key=query + corpus + name1 + "1",
200
+ )
201
+ results2 = image_search(query, corpus, name2)
202
+ with c2:
203
+ clicked2 = clickable_images(
204
+ [result[0] for result in results2],
205
+ titles=[result[1] for result in results2],
206
+ div_style=div_style,
207
+ img_style={"margin": "2px", "height": "150px"},
208
+ key=query + corpus + name2 + "2",
209
+ )
210
+ else:
211
+ clicked1 = clickable_images(
212
+ [result[0] for result in results1],
213
+ titles=[result[1] for result in results1],
214
+ div_style=div_style,
215
+ img_style={"margin": "2px", "height": "200px"},
216
+ key=query + corpus + name1 + "1",
217
+ )
218
+ clicked2 = -1
219
+
220
+ if clicked2 >= 0 or clicked1 >= 0:
221
+ change_query = False
222
+ if "last_clicked" not in st.session_state:
223
+ change_query = True
224
+ else:
225
+ if max(clicked2, clicked1) != st.session_state["last_clicked"]:
226
+ change_query = True
227
+ if change_query:
228
+ if clicked1 >= 0:
229
+ st.session_state["query"] = f"[{corpus}:{results1[clicked1][2]}]"
230
+ # elif clicked2 >= 0:
231
+ # st.session_state["query"] = f"[{corpus}:{results2[clicked2][2]}]"
232
+ st.experimental_rerun()
233
+
234
+
235
+ if __name__ == "__main__":
236
+ main()
data.csv ADDED
The diff for this file is too large to render. See raw diff
 
data2.csv ADDED
The diff for this file is too large to render. See raw diff
 
data3.csv ADDED
The diff for this file is too large to render. See raw diff
 
embeddings-vit-base-patch16.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125430e11a4a415ec0c0fc5339f97544f0447e4b0a24c20f2e59f8852e706afc
3
+ size 51200128
embeddings-vit-base-patch32.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7ebdff24079665faf58d07045056a63b5499753e3ffbda479691d53de3ab38
3
+ size 51200128
embeddings-vit-large-patch14-336.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f79f10ebe267b4ee7acd553dfe0ee31df846123630058a6d58c04bf22e0ad068
3
+ size 76800128
embeddings-vit-large-patch14.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64515f7d3d71137e2944f2c3d72c8df3e684b5d6a6ff7dcebb92370f7326ccfd
3
+ size 76800128
embeddings2-vit-base-patch16.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:153cf3fae2385d51fe8729d3a1c059f611ca47a3fc501049708114d1bbf79049
3
+ size 16732288
embeddings2-vit-base-patch32.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d545bed86121dac1cedcc1de61ea5295f5840c1eb751637e6628ac54faef81
3
+ size 16732288
embeddings2-vit-large-patch14-336.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e66eb377465fbfaa56cec079aa3e214533ceac43646f2ca78028ae4d8ad6d03
3
+ size 25098368
embeddings2-vit-large-patch14.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d730b33e758c2648419a96ac86d39516c59795e613c35700d3a64079e5a9a27
3
+ size 25098368
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch==2.0.1
2
+ transformers==4.21.1
3
+ ftfy==6.1.1
4
+ numpy==1.21.5
5
+ pandas==1.3.5
6
+ st-clickable-images==0.0.3
7
+ altair<5