Spaces:
Sleeping
Sleeping
adding datas and files
Browse files- README.md +32 -7
- app.py +236 -0
- data.csv +0 -0
- data2.csv +0 -0
- data3.csv +0 -0
- embeddings-vit-base-patch16.npy +3 -0
- embeddings-vit-base-patch32.npy +3 -0
- embeddings-vit-large-patch14-336.npy +3 -0
- embeddings-vit-large-patch14.npy +3 -0
- embeddings2-vit-base-patch16.npy +3 -0
- embeddings2-vit-base-patch32.npy +3 -0
- embeddings2-vit-large-patch14-336.npy +3 -0
- embeddings2-vit-large-patch14.npy +3 -0
- requirements.txt +7 -0
README.md
CHANGED
@@ -1,13 +1,38 @@
|
|
1 |
---
|
2 |
-
title: Clip
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: mit
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Clip Demo
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: blue
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.2.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
+
# Configuration
|
13 |
+
|
14 |
+
`title`: _string_
|
15 |
+
Display title for the Space
|
16 |
+
|
17 |
+
`emoji`: _string_
|
18 |
+
Space emoji (emoji-only character allowed)
|
19 |
+
|
20 |
+
`colorFrom`: _string_
|
21 |
+
Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
|
22 |
+
|
23 |
+
`colorTo`: _string_
|
24 |
+
Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
|
25 |
+
|
26 |
+
`sdk`: _string_
|
27 |
+
Can be either `gradio` or `streamlit`
|
28 |
+
|
29 |
+
`sdk_version` : _string_
|
30 |
+
Only applicable for `streamlit` SDK.
|
31 |
+
See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
|
32 |
+
|
33 |
+
`app_file`: _string_
|
34 |
+
Path to your main application file (which contains either `gradio` or `streamlit` Python code).
|
35 |
+
Path is relative to the root of the repository.
|
36 |
+
|
37 |
+
`pinned`: _boolean_
|
38 |
+
Whether the Space stays on top of your list.
|
app.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from html import escape
|
2 |
+
import re
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd, numpy as np
|
5 |
+
import torch
|
6 |
+
from transformers import CLIPProcessor, CLIPModel
|
7 |
+
from st_clickable_images import clickable_images
|
8 |
+
|
9 |
+
MODEL_NAMES = [
|
10 |
+
# "base-patch32",
|
11 |
+
# "base-patch16",
|
12 |
+
# "large-patch14",
|
13 |
+
"large-patch14-336"
|
14 |
+
]
|
15 |
+
|
16 |
+
|
17 |
+
@st.cache(allow_output_mutation=True)
|
18 |
+
def load():
|
19 |
+
df = {0: pd.read_csv("data.csv"), 1: pd.read_csv("data2.csv")}
|
20 |
+
models = {}
|
21 |
+
processors = {}
|
22 |
+
embeddings = {}
|
23 |
+
for name in MODEL_NAMES:
|
24 |
+
models[name] = CLIPModel.from_pretrained(f"openai/clip-vit-{name}").eval()
|
25 |
+
processors[name] = CLIPProcessor.from_pretrained(f"openai/clip-vit-{name}")
|
26 |
+
embeddings[name] = {
|
27 |
+
0: np.load(f"embeddings-vit-{name}.npy"),
|
28 |
+
1: np.load(f"embeddings2-vit-{name}.npy"),
|
29 |
+
}
|
30 |
+
for k in [0, 1]:
|
31 |
+
embeddings[name][k] = embeddings[name][k] / np.linalg.norm(
|
32 |
+
embeddings[name][k], axis=1, keepdims=True
|
33 |
+
)
|
34 |
+
return models, processors, df, embeddings
|
35 |
+
|
36 |
+
|
37 |
+
models, processors, df, embeddings = load()
|
38 |
+
source = {0: "\nSource: Unsplash", 1: "\nSource: The Movie Database (TMDB)"}
|
39 |
+
|
40 |
+
|
41 |
+
def compute_text_embeddings(list_of_strings, name):
|
42 |
+
inputs = processors[name](text=list_of_strings, return_tensors="pt", padding=True)
|
43 |
+
with torch.no_grad():
|
44 |
+
result = models[name].get_text_features(**inputs).detach().numpy()
|
45 |
+
return result / np.linalg.norm(result, axis=1, keepdims=True)
|
46 |
+
|
47 |
+
|
48 |
+
def image_search(query, corpus, name, n_results=24):
|
49 |
+
positive_embeddings = None
|
50 |
+
|
51 |
+
def concatenate_embeddings(e1, e2):
|
52 |
+
if e1 is None:
|
53 |
+
return e2
|
54 |
+
else:
|
55 |
+
return np.concatenate((e1, e2), axis=0)
|
56 |
+
|
57 |
+
splitted_query = query.split("EXCLUDING ")
|
58 |
+
dot_product = 0
|
59 |
+
k = 0 if corpus == "Unsplash" else 1
|
60 |
+
if len(splitted_query[0]) > 0:
|
61 |
+
positive_queries = splitted_query[0].split(";")
|
62 |
+
for positive_query in positive_queries:
|
63 |
+
match = re.match(r"\[(Movies|Unsplash):(\d{1,5})\](.*)", positive_query)
|
64 |
+
if match:
|
65 |
+
corpus2, idx, remainder = match.groups()
|
66 |
+
idx, remainder = int(idx), remainder.strip()
|
67 |
+
k2 = 0 if corpus2 == "Unsplash" else 1
|
68 |
+
positive_embeddings = concatenate_embeddings(
|
69 |
+
positive_embeddings, embeddings[name][k2][idx : idx + 1, :]
|
70 |
+
)
|
71 |
+
if len(remainder) > 0:
|
72 |
+
positive_embeddings = concatenate_embeddings(
|
73 |
+
positive_embeddings, compute_text_embeddings([remainder], name)
|
74 |
+
)
|
75 |
+
else:
|
76 |
+
positive_embeddings = concatenate_embeddings(
|
77 |
+
positive_embeddings, compute_text_embeddings([positive_query], name)
|
78 |
+
)
|
79 |
+
dot_product = embeddings[name][k] @ positive_embeddings.T
|
80 |
+
dot_product = dot_product - np.median(dot_product, axis=0)
|
81 |
+
dot_product = dot_product / np.max(dot_product, axis=0, keepdims=True)
|
82 |
+
dot_product = np.min(dot_product, axis=1)
|
83 |
+
|
84 |
+
if len(splitted_query) > 1:
|
85 |
+
negative_queries = (" ".join(splitted_query[1:])).split(";")
|
86 |
+
negative_embeddings = compute_text_embeddings(negative_queries, name)
|
87 |
+
dot_product2 = embeddings[name][k] @ negative_embeddings.T
|
88 |
+
dot_product2 = dot_product2 - np.median(dot_product2, axis=0)
|
89 |
+
dot_product2 = dot_product2 / np.max(dot_product2, axis=0, keepdims=True)
|
90 |
+
dot_product -= np.max(np.maximum(dot_product2, 0), axis=1)
|
91 |
+
|
92 |
+
results = np.argsort(dot_product)[-1 : -n_results - 1 : -1]
|
93 |
+
return [
|
94 |
+
(
|
95 |
+
df[k].iloc[i]["path"],
|
96 |
+
df[k].iloc[i]["tooltip"] + source[k],
|
97 |
+
i,
|
98 |
+
)
|
99 |
+
for i in results
|
100 |
+
]
|
101 |
+
|
102 |
+
|
103 |
+
description = """
|
104 |
+
# Semantic image search
|
105 |
+
|
106 |
+
**Enter your query and hit enter**
|
107 |
+
|
108 |
+
*Built with OpenAI's [CLIP](https://openai.com/blog/clip/) model, π€ Hugging Face's [transformers library](https://huggingface.co/transformers/), [Streamlit](https://streamlit.io/), 25k images from [Unsplash](https://unsplash.com/) and 8k images from [The Movie Database (TMDB)](https://www.themoviedb.org/)*
|
109 |
+
|
110 |
+
*Inspired by [Unsplash Image Search](https://github.com/haltakov/natural-language-image-search) from Vladimir Haltakov and [Alph, The Sacred River](https://github.com/thoppe/alph-the-sacred-river) from Travis Hoppe*
|
111 |
+
"""
|
112 |
+
|
113 |
+
howto = """
|
114 |
+
- Click on an image to use it as a query and find similar images
|
115 |
+
- Several queries, including one based on an image, can be combined (use "**;**" as a separator)
|
116 |
+
- If the input includes "**EXCLUDING**", the part right of it will be used as a negative query
|
117 |
+
"""
|
118 |
+
|
119 |
+
div_style = {
|
120 |
+
"display": "flex",
|
121 |
+
"justify-content": "center",
|
122 |
+
"flex-wrap": "wrap",
|
123 |
+
}
|
124 |
+
|
125 |
+
|
126 |
+
def main():
|
127 |
+
st.markdown(
|
128 |
+
"""
|
129 |
+
<style>
|
130 |
+
.block-container{
|
131 |
+
max-width: 1200px;
|
132 |
+
}
|
133 |
+
div.row-widget.stRadio > div{
|
134 |
+
flex-direction:row;
|
135 |
+
display: flex;
|
136 |
+
justify-content: center;
|
137 |
+
}
|
138 |
+
div.row-widget.stRadio > div > label{
|
139 |
+
margin-left: 5px;
|
140 |
+
margin-right: 5px;
|
141 |
+
}
|
142 |
+
.row-widget {
|
143 |
+
margin-top: -25px;
|
144 |
+
}
|
145 |
+
section>div:first-child {
|
146 |
+
padding-top: 30px;
|
147 |
+
}
|
148 |
+
div.reportview-container > section:first-child{
|
149 |
+
max-width: 320px;
|
150 |
+
}
|
151 |
+
#MainMenu {
|
152 |
+
visibility: hidden;
|
153 |
+
}
|
154 |
+
footer {
|
155 |
+
visibility: hidden;
|
156 |
+
}
|
157 |
+
</style>""",
|
158 |
+
unsafe_allow_html=True,
|
159 |
+
)
|
160 |
+
st.sidebar.markdown(description)
|
161 |
+
with st.sidebar.expander("Advanced use"):
|
162 |
+
st.markdown(howto)
|
163 |
+
# mode = st.sidebar.selectbox(
|
164 |
+
# "", ["Results for ViT-L/14@336px", "Comparison of 2 models"], index=0
|
165 |
+
# )
|
166 |
+
|
167 |
+
_, c, _ = st.columns((1, 3, 1))
|
168 |
+
if "query" in st.session_state:
|
169 |
+
query = c.text_input("", value=st.session_state["query"])
|
170 |
+
else:
|
171 |
+
query = c.text_input("", value="clouds at sunset")
|
172 |
+
corpus = st.radio("", ["Unsplash", "Movies"])
|
173 |
+
|
174 |
+
models_dict = {
|
175 |
+
"ViT-B/32 (quicker)": "base-patch32",
|
176 |
+
"ViT-B/16 (average)": "base-patch16",
|
177 |
+
# "ViT-L/14 (slow)": "large-patch14",
|
178 |
+
"ViT-L/14@336px (slower)": "large-patch14-336",
|
179 |
+
}
|
180 |
+
|
181 |
+
if False: # "Comparison" in mode:
|
182 |
+
c1, c2 = st.columns((1, 1))
|
183 |
+
selection1 = c1.selectbox("", models_dict.keys(), index=0)
|
184 |
+
selection2 = c2.selectbox("", models_dict.keys(), index=2)
|
185 |
+
name1 = models_dict[selection1]
|
186 |
+
name2 = models_dict[selection2]
|
187 |
+
else:
|
188 |
+
name1 = MODEL_NAMES[-1]
|
189 |
+
|
190 |
+
if len(query) > 0:
|
191 |
+
results1 = image_search(query, corpus, name1)
|
192 |
+
if False: # "Comparison" in mode:
|
193 |
+
with c1:
|
194 |
+
clicked1 = clickable_images(
|
195 |
+
[result[0] for result in results1],
|
196 |
+
titles=[result[1] for result in results1],
|
197 |
+
div_style=div_style,
|
198 |
+
img_style={"margin": "2px", "height": "150px"},
|
199 |
+
key=query + corpus + name1 + "1",
|
200 |
+
)
|
201 |
+
results2 = image_search(query, corpus, name2)
|
202 |
+
with c2:
|
203 |
+
clicked2 = clickable_images(
|
204 |
+
[result[0] for result in results2],
|
205 |
+
titles=[result[1] for result in results2],
|
206 |
+
div_style=div_style,
|
207 |
+
img_style={"margin": "2px", "height": "150px"},
|
208 |
+
key=query + corpus + name2 + "2",
|
209 |
+
)
|
210 |
+
else:
|
211 |
+
clicked1 = clickable_images(
|
212 |
+
[result[0] for result in results1],
|
213 |
+
titles=[result[1] for result in results1],
|
214 |
+
div_style=div_style,
|
215 |
+
img_style={"margin": "2px", "height": "200px"},
|
216 |
+
key=query + corpus + name1 + "1",
|
217 |
+
)
|
218 |
+
clicked2 = -1
|
219 |
+
|
220 |
+
if clicked2 >= 0 or clicked1 >= 0:
|
221 |
+
change_query = False
|
222 |
+
if "last_clicked" not in st.session_state:
|
223 |
+
change_query = True
|
224 |
+
else:
|
225 |
+
if max(clicked2, clicked1) != st.session_state["last_clicked"]:
|
226 |
+
change_query = True
|
227 |
+
if change_query:
|
228 |
+
if clicked1 >= 0:
|
229 |
+
st.session_state["query"] = f"[{corpus}:{results1[clicked1][2]}]"
|
230 |
+
# elif clicked2 >= 0:
|
231 |
+
# st.session_state["query"] = f"[{corpus}:{results2[clicked2][2]}]"
|
232 |
+
st.experimental_rerun()
|
233 |
+
|
234 |
+
|
235 |
+
if __name__ == "__main__":
|
236 |
+
main()
|
data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data2.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data3.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
embeddings-vit-base-patch16.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:125430e11a4a415ec0c0fc5339f97544f0447e4b0a24c20f2e59f8852e706afc
|
3 |
+
size 51200128
|
embeddings-vit-base-patch32.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f7ebdff24079665faf58d07045056a63b5499753e3ffbda479691d53de3ab38
|
3 |
+
size 51200128
|
embeddings-vit-large-patch14-336.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f79f10ebe267b4ee7acd553dfe0ee31df846123630058a6d58c04bf22e0ad068
|
3 |
+
size 76800128
|
embeddings-vit-large-patch14.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64515f7d3d71137e2944f2c3d72c8df3e684b5d6a6ff7dcebb92370f7326ccfd
|
3 |
+
size 76800128
|
embeddings2-vit-base-patch16.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:153cf3fae2385d51fe8729d3a1c059f611ca47a3fc501049708114d1bbf79049
|
3 |
+
size 16732288
|
embeddings2-vit-base-patch32.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7d545bed86121dac1cedcc1de61ea5295f5840c1eb751637e6628ac54faef81
|
3 |
+
size 16732288
|
embeddings2-vit-large-patch14-336.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e66eb377465fbfaa56cec079aa3e214533ceac43646f2ca78028ae4d8ad6d03
|
3 |
+
size 25098368
|
embeddings2-vit-large-patch14.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d730b33e758c2648419a96ac86d39516c59795e613c35700d3a64079e5a9a27
|
3 |
+
size 25098368
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.0.1
|
2 |
+
transformers==4.21.1
|
3 |
+
ftfy==6.1.1
|
4 |
+
numpy==1.21.5
|
5 |
+
pandas==1.3.5
|
6 |
+
st-clickable-images==0.0.3
|
7 |
+
altair<5
|