Spaces:
Build error
Build error
Commit
·
b4c60c9
1
Parent(s):
81b51c7
app.py
Browse files
app.py
CHANGED
@@ -9,63 +9,14 @@ from io import BytesIO
|
|
9 |
import requests
|
10 |
import gradio as gr
|
11 |
import os
|
12 |
-
#from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
|
13 |
import sentence_transformers
|
14 |
from sentence_transformers import SentenceTransformer, util
|
15 |
|
16 |
# check if CUDA available
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
-
|
19 |
-
# Load the openAI's CLIP model
|
20 |
-
#model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
21 |
-
#processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
22 |
-
#tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
|
23 |
-
|
24 |
-
# taking photo IDs
|
25 |
-
#photo_ids = pd.read_csv("./photo_ids.csv")
|
26 |
-
#photo_ids = list(photo_ids['photo_id'])
|
27 |
-
|
28 |
-
# Photo dataset
|
29 |
-
#photos = pd.read_csv("./photos.tsv000", sep="\t", header=0)
|
30 |
-
|
31 |
-
# taking features vectors
|
32 |
-
#photo_features = np.load("./features.npy")
|
33 |
|
34 |
IMAGES_DIR = Path("./photos/")
|
35 |
-
#def show_output_image(matched_images) :
|
36 |
-
#image=[]
|
37 |
-
#for photo_id in matched_images:
|
38 |
-
# photo_image_url = f"https://unsplash.com/photos/{photo_id}/download?w=280"
|
39 |
-
#response = requests.get(photo_image_url, stream=True)
|
40 |
-
#img = Image.open(BytesIO(response.content))
|
41 |
-
# response = requests.get(photo_image_url, stream=True).raw
|
42 |
-
# img = Image.open(response)
|
43 |
-
#photo = photo_id + '.jpg'
|
44 |
-
#img = Image.open(response).convert("RGB")
|
45 |
-
#img = Image.open(os.path.join(IMAGES_DIR, photo))
|
46 |
-
#image.append(img)
|
47 |
-
#return image
|
48 |
-
|
49 |
|
50 |
-
# Encode and normalize the search query using CLIP
|
51 |
-
#def encode_search_query(search_query, model, device):
|
52 |
-
# with torch.no_grad():
|
53 |
-
# inputs = tokenizer([search_query], padding=True, return_tensors="pt")
|
54 |
-
#inputs = processor(text=[search_query], images=None, return_tensors="pt", padding=True)
|
55 |
-
# text_features = model.get_text_features(**inputs).cpu().numpy()
|
56 |
-
# return text_features
|
57 |
-
|
58 |
-
# Find all matched photos
|
59 |
-
#def find_matches(features, photo_ids, results_count=4):
|
60 |
-
# Compute the similarity between the search query and each photo using the Cosine similarity
|
61 |
-
#text_features = np.array(text_features)
|
62 |
-
#similarities = (photo_features @ features.T).squeeze(1)
|
63 |
-
# Sort the photos by their similarity score
|
64 |
-
#best_photo_idx = (-similarities).argsort()
|
65 |
-
# Return the photo IDs of the best matches
|
66 |
-
#matches = [photo_ids[i] for i in best_photo_idx[:results_count]]
|
67 |
-
#return matches
|
68 |
-
|
69 |
#Load CLIP model
|
70 |
model = SentenceTransformer('clip-ViT-B-32')
|
71 |
|
@@ -85,40 +36,21 @@ def display_matches(similarity, topk):
|
|
85 |
def image_search(Option, topk, search_text, search_image):
|
86 |
|
87 |
# Input Text Query
|
88 |
-
#search_query = "The feeling when your program finally works"
|
89 |
if Option == "Text-To-Image" :
|
90 |
-
#
|
91 |
-
#text_features = encode_search_query(search_text, model, device)
|
92 |
text_emb = model.encode([search_text], convert_to_tensor=True)
|
|
|
93 |
similarity = util.cos_sim(img_emb, text_emb)
|
|
|
|
|
94 |
return display_matches(similarity, topk)
|
95 |
-
# Find the matched Images
|
96 |
-
#matched_images = find_matches(text_features, photo_features, photo_ids, 4)
|
97 |
-
#matched_results = util.semantic_search(text_emb, img_emb, top_k=4)[0]
|
98 |
-
|
99 |
-
# top 4 highest ranked images
|
100 |
-
#return display_matches(matched_results)
|
101 |
elif Option == "Image-To-Image":
|
102 |
-
# Input Image for Search
|
103 |
-
#search_image = Image.fromarray(search_image.astype('uint8'), 'RGB')
|
104 |
-
|
105 |
-
#with torch.no_grad():
|
106 |
-
# processed_image = processor(text=None, images=search_image, return_tensors="pt", padding=True)["pixel_values"]
|
107 |
-
# image_feature = model.get_image_features(processed_image.to(device))
|
108 |
-
# image_feature /= image_feature.norm(dim=-1, keepdim=True)
|
109 |
-
#image_feature = image_feature.cpu().numpy()
|
110 |
-
# Find the matched Images
|
111 |
-
#matched_images = find_matches(image_feature, photo_ids, 4)
|
112 |
-
|
113 |
-
#image_emb = model.encode(Image.open(search_image), convert_to_tensor=True)
|
114 |
-
#image_emb = model.encode(Image.open(search_image))
|
115 |
-
# Find the matched Images
|
116 |
-
#matched_images = find_matches(text_features, photo_features, photo_ids, 4)
|
117 |
-
#similarity = util.cos_sim(image_emb, img_emb)
|
118 |
-
#matched_results = util.semantic_search(image_emb, img_emb, 4)[0]
|
119 |
-
|
120 |
image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
|
|
|
121 |
similarity = util.cos_sim(img_emb, image_emb)
|
|
|
|
|
122 |
return display_matches(similarity, topk)
|
123 |
|
124 |
gr.Interface(fn=image_search, title="Search Image",
|
|
|
9 |
import requests
|
10 |
import gradio as gr
|
11 |
import os
|
|
|
12 |
import sentence_transformers
|
13 |
from sentence_transformers import SentenceTransformer, util
|
14 |
|
15 |
# check if CUDA available
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
IMAGES_DIR = Path("./photos/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
#Load CLIP model
|
21 |
model = SentenceTransformer('clip-ViT-B-32')
|
22 |
|
|
|
36 |
def image_search(Option, topk, search_text, search_image):
|
37 |
|
38 |
# Input Text Query
|
|
|
39 |
if Option == "Text-To-Image" :
|
40 |
+
# Encode the given Input text for Search & take it in tensor form
|
|
|
41 |
text_emb = model.encode([search_text], convert_to_tensor=True)
|
42 |
+
# Compute cosine similarities between encoded input text (in tensor) & encoded images from unsplash dataset
|
43 |
similarity = util.cos_sim(img_emb, text_emb)
|
44 |
+
|
45 |
+
#using the computed similarities, find the topk best matches
|
46 |
return display_matches(similarity, topk)
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
elif Option == "Image-To-Image":
|
48 |
+
# Encode the given Input Image for Search & take it in tensor form
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
|
50 |
+
# Compute cosine similarities between encoded input image (in tensor) & encoded images from unsplash dataset
|
51 |
similarity = util.cos_sim(img_emb, image_emb)
|
52 |
+
|
53 |
+
#using the computed similarities, find the topk best matches
|
54 |
return display_matches(similarity, topk)
|
55 |
|
56 |
gr.Interface(fn=image_search, title="Search Image",
|