Spaces:
Build error
Build error
File size: 6,641 Bytes
087fe06 7c286b0 087fe06 06ffe20 4640c21 7c286b0 b564a70 90348c5 7c286b0 90348c5 b564a70 7c286b0 b564a70 4640c21 b564a70 4640c21 b564a70 0f017d1 7c286b0 ffaf3d7 7c286b0 60b18f5 37a1406 60b18f5 7c286b0 e59258f 087fe06 7c286b0 fde2555 7c286b0 e59258f 087fe06 7c286b0 087fe06 147b3ce 7c286b0 087fe06 7c286b0 087fe06 7c286b0 1fbfc7c 7c286b0 fd0cac7 1fbfc7c fd0cac7 1fbfc7c 7c286b0 087fe06 fd0cac7 087fe06 0f017d1 fd0cac7 7c286b0 950373e 0f017d1 fd0cac7 087fe06 7c286b0 0f017d1 547056a 7c286b0 0f017d1 fd0cac7 27b8fd9 ffaf3d7 3c7ac16 7c286b0 087fe06 7c286b0 38e30d4 0f017d1 7c286b0 950373e 0f017d1 9f59463 f402651 9a9f244 fd0cac7 087fe06 fd0cac7 087fe06 fd0cac7 f889699 a8854b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# Import Libraries
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import pickle
from PIL import Image
from io import BytesIO
import requests
import gradio as gr
import os
#from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
import sentence_transformers
from sentence_transformers import SentenceTransformer, util
# check if CUDA available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the openAI's CLIP model
#model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
#processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
#tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
# taking photo IDs
#photo_ids = pd.read_csv("./photo_ids.csv")
#photo_ids = list(photo_ids['photo_id'])
# Photo dataset
#photos = pd.read_csv("./photos.tsv000", sep="\t", header=0)
# taking features vectors
#photo_features = np.load("./features.npy")
IMAGES_DIR = Path("./photos/")
#def show_output_image(matched_images) :
#image=[]
#for photo_id in matched_images:
# photo_image_url = f"https://unsplash.com/photos/{photo_id}/download?w=280"
#response = requests.get(photo_image_url, stream=True)
#img = Image.open(BytesIO(response.content))
# response = requests.get(photo_image_url, stream=True).raw
# img = Image.open(response)
#photo = photo_id + '.jpg'
#img = Image.open(response).convert("RGB")
#img = Image.open(os.path.join(IMAGES_DIR, photo))
#image.append(img)
#return image
# Encode and normalize the search query using CLIP
#def encode_search_query(search_query, model, device):
# with torch.no_grad():
# inputs = tokenizer([search_query], padding=True, return_tensors="pt")
#inputs = processor(text=[search_query], images=None, return_tensors="pt", padding=True)
# text_features = model.get_text_features(**inputs).cpu().numpy()
# return text_features
# Find all matched photos
#def find_matches(features, photo_ids, results_count=4):
# Compute the similarity between the search query and each photo using the Cosine similarity
#text_features = np.array(text_features)
#similarities = (photo_features @ features.T).squeeze(1)
# Sort the photos by their similarity score
#best_photo_idx = (-similarities).argsort()
# Return the photo IDs of the best matches
#matches = [photo_ids[i] for i in best_photo_idx[:results_count]]
#return matches
#Load CLIP model
model = SentenceTransformer('clip-ViT-B-32')
# pre-computed embeddings
emb_filename = 'unsplash-25k-photos-embeddings.pkl'
with open(emb_filename, 'rb') as emb:
img_names, img_emb = pickle.load(emb)
def display_matches(similarity, topk):
best_matched_images = []
top_k_indices = torch.topk(similarity, topk, 0).indices
for matched_image in top_k_indices:
img = Image.open(IMAGES_DIR / img_names[matched_image])
best_matched_images.append(img)
return best_matched_images
def image_search(Option, topk, search_text, search_image):
# Input Text Query
#search_query = "The feeling when your program finally works"
if Option == "Text-To-Image" :
# Extracting text features embeddings
#text_features = encode_search_query(search_text, model, device)
text_emb = model.encode([search_text], convert_to_tensor=True)
similarity = util.cos_sim(img_emb, text_emb)
return display_matches(similarity, topk)
# Find the matched Images
#matched_images = find_matches(text_features, photo_features, photo_ids, 4)
#matched_results = util.semantic_search(text_emb, img_emb, top_k=4)[0]
# top 4 highest ranked images
#return display_matches(matched_results)
elif Option == "Image-To-Image":
# Input Image for Search
#search_image = Image.fromarray(search_image.astype('uint8'), 'RGB')
#with torch.no_grad():
# processed_image = processor(text=None, images=search_image, return_tensors="pt", padding=True)["pixel_values"]
# image_feature = model.get_image_features(processed_image.to(device))
# image_feature /= image_feature.norm(dim=-1, keepdim=True)
#image_feature = image_feature.cpu().numpy()
# Find the matched Images
#matched_images = find_matches(image_feature, photo_ids, 4)
#image_emb = model.encode(Image.open(search_image), convert_to_tensor=True)
#image_emb = model.encode(Image.open(search_image))
# Find the matched Images
#matched_images = find_matches(text_features, photo_features, photo_ids, 4)
#similarity = util.cos_sim(image_emb, img_emb)
#matched_results = util.semantic_search(image_emb, img_emb, 4)[0]
image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
similarity = util.cos_sim(img_emb, image_emb)
return display_matches(similarity, topk)
#gr.Interface(fn=image_search,
# inputs=[gr.inputs.Textbox(lines=7, label="Input Text"),
# gr.inputs.Image(optional=True),
# gr.inputs.Dropdown(["Text-To-Image", "Image-To-Image"])
# ],
# outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil"), gr.outputs.Image(type="pil"), gr.outputs.Image(type="pil"), gr.outputs.Image(type="pil")]),
# enable_queue=True
# ).launch(debug=True,share=True)
gr.Interface(fn=image_search, title="Search Image",
description="Enter the text or image to search the other most relevant images...",
article="""
Instructions:-
1. Select the option - `Text to Image` OR `Image To Image`.
2. Then accordingly enter the text or image.
3. Just on entering the text or image , you will get the output image on right side
Note: on entering the text, it may first show a different/unexpected image but then after a sec. it will show the correct image.
""",
theme="huggingface",
inputs=[gr.inputs.Dropdown(["Text-To-Image", "Image-To-Image"]),
gr.inputs.Dropdown(["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], type="index", default="1", label="Select Top K Images"),
gr.inputs.Textbox(lines=3, label="Input Text", placeholder="Enter the text..."),
gr.inputs.Image(type="pil", optional=True)
],
outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil")]),
enable_queue=True
).launch(debug=True,share=True) |