File size: 2,735 Bytes
568d45f
 
 
 
cb57dca
446f144
cb57dca
 
 
568d45f
cb57dca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446f144
 
cb57dca
446f144
cb57dca
 
 
 
 
 
 
 
 
 
446f144
cb57dca
 
 
 
446f144
cb57dca
568d45f
 
cb57dca
 
 
 
568d45f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
import random
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util
import logging
import torch
from PIL import Image
# Create a custom logger
logger = logging.getLogger(__name__)

# Set the level of this logger. INFO means that it will log all INFO, WARNING, ERROR, and CRITICAL messages.
logger.setLevel(logging.INFO)

# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)

# Create formatters and add it to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(c_format)

# Add handlers to the logger
logger.addHandler(c_handler)

class SearchEngine:
    def __init__(self, device="cpu"):
        self.device = device if torch.cuda.is_available() else "cpu"
        self.model = SentenceTransformer('clip-ViT-B-32')
        self.embedding_dataset = load_dataset("JLD/unsplash25k-image-embeddings", trust_remote_code=True, split="train").with_format("torch", device=self.device)
        image_dataset = load_dataset("jamescalam/unsplash-25k-photos", trust_remote_code=True, revision="refs/pr/3")
        self.image_dataset = {image["photo_id"]: image["photo_image_url"] for image in image_dataset["train"]}

    def get_candidates(self, query_embedding, top_k=5):
        logger.info("Getting candidates")
        candidates = util.semantic_search(query_embeddings=query_embedding.unsqueeze(0), corpus_embeddings=self.embedding_dataset["image_embedding"].squeeze(1), top_k=top_k)[0]
        return [self.image_dataset.get(self.embedding_dataset[candidate["corpus_id"]]["image_id"], "https://upload.wikimedia.org/wikipedia/commons/6/69/NASA-HS201427a-HubbleUltraDeepField2014-20140603.jpg") for candidate in candidates]

    def search_images_from_text(self, text):
        logger.info("Searching images from text")
        emb = self.model.encode(text, convert_to_tensor=True, device=self.device)
        return self.get_candidates(query_embedding=emb)

    def search_images_from_image(self, image):
        logger.info("Searching images from image")
        emb = self.model.encode(Image.fromarray(image), convert_to_tensor=True, device=self.device)
        return self.get_candidates(query_embedding=emb)

def main():
    logger.info("Loading dataset")
    search_engine = SearchEngine()
    text_to_image_iface = gr.Interface(fn=search_engine.search_images_from_text, inputs="text", outputs="gallery")
    image_to_image_iface = gr.Interface(fn=search_engine.search_images_from_image, inputs="image", outputs="gallery")
    demo = gr.TabbedInterface([text_to_image_iface, image_to_image_iface], ["Text query", "Image query"])
    demo.launch()
    
if __name__ == "__main__":
    main()