Spaces:
Build error
Build error
Commit
·
e59258f
1
Parent(s):
333423b
app.py
Browse files
app.py
CHANGED
@@ -8,15 +8,16 @@ from PIL import Image
|
|
8 |
from io import BytesIO
|
9 |
import requests
|
10 |
import gradio as gr
|
11 |
-
|
12 |
-
#model, preprocess = clip.load("ViT-B/32", jit=False)
|
13 |
-
#display output photo
|
14 |
|
15 |
# check if CUDA available
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
|
18 |
# Load the openAI's CLIP model
|
19 |
-
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
|
|
|
|
|
|
|
20 |
|
21 |
# taking photo IDs
|
22 |
photo_ids = pd.read_csv("./photo_ids.csv")
|
@@ -44,13 +45,17 @@ def show_output_image(matched_images) :
|
|
44 |
#img = Image.open('./photos/'+photo_jpg)
|
45 |
image.append(img)
|
46 |
return image
|
|
|
47 |
# Encode and normalize the search query using CLIP
|
48 |
def encode_search_query(search_query, model, device):
|
49 |
with torch.no_grad():
|
50 |
-
|
51 |
-
text_encoded
|
|
|
52 |
# Retrieve the feature vector from the GPU and convert it to a numpy array
|
53 |
-
return
|
|
|
|
|
54 |
# Find all matched photos
|
55 |
def find_matches(text_features, photo_features, photo_ids, results_count=4):
|
56 |
# Compute the similarity between the search query and each photo using the Cosine similarity
|
@@ -84,8 +89,10 @@ def image_search(search_text, search_image, option):
|
|
84 |
elif option == "Image-To-Image":
|
85 |
# Input Image for Search
|
86 |
with torch.no_grad():
|
87 |
-
|
88 |
-
|
|
|
|
|
89 |
# Find the matched Images
|
90 |
matched_images = find_matches(image_feature, photo_features, photo_ids, 4)
|
91 |
#is_input_image = True
|
|
|
8 |
from io import BytesIO
|
9 |
import requests
|
10 |
import gradio as gr
|
11 |
+
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
|
|
|
|
|
12 |
|
13 |
# check if CUDA available
|
14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
|
16 |
# Load the openAI's CLIP model
|
17 |
+
#model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
|
18 |
+
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
19 |
+
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
20 |
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
|
21 |
|
22 |
# taking photo IDs
|
23 |
photo_ids = pd.read_csv("./photo_ids.csv")
|
|
|
45 |
#img = Image.open('./photos/'+photo_jpg)
|
46 |
image.append(img)
|
47 |
return image
|
48 |
+
|
49 |
# Encode and normalize the search query using CLIP
|
50 |
def encode_search_query(search_query, model, device):
|
51 |
with torch.no_grad():
|
52 |
+
inputs = tokenizer([search_query], padding=True, return_tensors="pt")
|
53 |
+
#text_encoded = model.encode_text(clip.tokenize(search_query).to(device))
|
54 |
+
#text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
|
55 |
# Retrieve the feature vector from the GPU and convert it to a numpy array
|
56 |
+
return model.get_text_features(**inputs).cpu().numpy()
|
57 |
+
#return text_encoded.cpu().numpy()
|
58 |
+
|
59 |
# Find all matched photos
|
60 |
def find_matches(text_features, photo_features, photo_ids, results_count=4):
|
61 |
# Compute the similarity between the search query and each photo using the Cosine similarity
|
|
|
89 |
elif option == "Image-To-Image":
|
90 |
# Input Image for Search
|
91 |
with torch.no_grad():
|
92 |
+
processed_image = processor(text=None, images=search_image, return_tensors="pt", padding=True)["pixel_values"]
|
93 |
+
image_feature = model.get_image_features(processed_image.to(device))
|
94 |
+
image_feature /= image_feature.norm(dim=-1, keepdim=True)
|
95 |
+
image_feature = image_feature.cpu().numpy()
|
96 |
# Find the matched Images
|
97 |
matched_images = find_matches(image_feature, photo_features, photo_ids, 4)
|
98 |
#is_input_image = True
|