Spaces:
Sleeping
Sleeping
cheng
commited on
Commit
·
bf67113
1
Parent(s):
5d9e3f9
update multiple object
Browse files- clip_component.py +15 -11
clip_component.py
CHANGED
@@ -4,9 +4,9 @@ import os
|
|
4 |
from PIL import Image
|
5 |
import clip
|
6 |
|
|
|
7 |
def get_token_from_clip(image):
|
8 |
-
|
9 |
-
text_inputs = ["Bacon", "Bread", "Fruit", "Beans and Rice", "fries", "Lasagna"]
|
10 |
text_tokens = clip.tokenize(text_inputs)
|
11 |
|
12 |
device = "cpu"
|
@@ -27,15 +27,19 @@ def get_token_from_clip(image):
|
|
27 |
with torch.no_grad():
|
28 |
similarity = text_features.cpu().numpy() @ image_feature.cpu().numpy().T
|
29 |
|
30 |
-
|
31 |
-
|
32 |
|
33 |
for i in range(similarity.shape[0]):
|
34 |
similarity_num = (100.0 * similarity[i][0])
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
4 |
from PIL import Image
|
5 |
import clip
|
6 |
|
7 |
+
similarity_threshold = 22.00
|
8 |
def get_token_from_clip(image):
|
9 |
+
text_inputs = ["apple", "banana", "lemon", "orange", "cereal", "salad", "chicken", "juice", "milk", "bread"]
|
|
|
10 |
text_tokens = clip.tokenize(text_inputs)
|
11 |
|
12 |
device = "cpu"
|
|
|
27 |
with torch.no_grad():
|
28 |
similarity = text_features.cpu().numpy() @ image_feature.cpu().numpy().T
|
29 |
|
30 |
+
results = []
|
31 |
+
detect_food = ""
|
32 |
|
33 |
for i in range(similarity.shape[0]):
|
34 |
similarity_num = (100.0 * similarity[i][0])
|
35 |
+
text_input = text_inputs[i]
|
36 |
+
results.append({"text_input": text_input, "similarity": similarity_num})
|
37 |
+
if similarity_num >= similarity_threshold:
|
38 |
+
detect_food += " " + text_input + " ."
|
39 |
+
# print(similarity_num)
|
40 |
+
detect_food_list = detect_food[1:]
|
41 |
+
results.sort(key=lambda x: x["similarity"], reverse=True)
|
42 |
+
# Print the caption for each text input along with their similarity scores
|
43 |
+
for result in results:
|
44 |
+
print(f"Text input: {result['text_input']}, Similarity: {result['similarity']:.2f}")
|
45 |
+
return detect_food_list
|