Bill Psomas commited on
Commit
d224f5c
·
1 Parent(s): 444aa6e

Add initial demo files

Browse files
Files changed (2) hide show
  1. app.py +63 -0
  2. features/patternnet_clip.pkl +3 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image
5
+ import open_clip
6
+ import gradio as gr
7
+ import pickle
8
+
9
+ # Load pre-trained model
10
+ model, _, tokenizer = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai')
11
+
12
+ # Load features
13
+ def load_features(pickle_file):
14
+ with open(pickle_file, 'rb') as f:
15
+ data = pickle.load(f)
16
+ return data
17
+
18
+ # Calculate similarity
19
+ def calculate_similarity(image_features, text_feature, lambda_val=0.5):
20
+ image_similarities = image_features @ text_feature.T
21
+ text_similarities = text_feature @ text_feature.T
22
+ combined_similarities = (1 - lambda_val) * image_similarities + lambda_val * text_similarities
23
+ return combined_similarities
24
+
25
+ # Load precomputed features
26
+ features = load_features('features/patternnet_clip.pkl')
27
+ image_features = torch.tensor(features['feats']).cuda()
28
+ image_paths = features['paths']
29
+
30
+ def image_text_retrieval(image, text, lambda_val):
31
+ # Preprocess image
32
+ preprocess = open_clip.get_preprocess('ViT-L-14')
33
+ image = preprocess(image).unsqueeze(0).cuda()
34
+
35
+ # Encode image and text
36
+ image_feature = model.encode_image(image).cpu()
37
+ text_feature = model.encode_text(tokenizer(text).unsqueeze(0).cuda()).cpu()
38
+
39
+ # Calculate combined similarities
40
+ similarities = calculate_similarity(image_features, text_feature, lambda_val)
41
+ top_indices = similarities.topk(5).indices.squeeze().tolist()
42
+
43
+ # Retrieve top images
44
+ top_images = [Image.open(image_paths[i]) for i in top_indices]
45
+ return top_images
46
+
47
+ # Create Gradio interface
48
+ def demo(image, text, lambda_val):
49
+ return image_text_retrieval(image, text, lambda_val)
50
+
51
+ iface = gr.Interface(
52
+ fn=demo,
53
+ inputs=[
54
+ gr.inputs.Image(type="pil", label="Query Image"),
55
+ gr.inputs.Textbox(lines=2, placeholder="Enter text query...", label="Text Query"),
56
+ gr.inputs.Slider(minimum=0, maximum=1, default=0.5, label="Lambda Value (Image-Text Weight)")
57
+ ],
58
+ outputs=[gr.outputs.Gallery(label="Retrieved Images")],
59
+ title="Composed Image Retrieval for Remote Sensing",
60
+ description="Upload a query image, enter a text query, and adjust the lambda value to retrieve images based on both image and text inputs."
61
+ )
62
+
63
+ iface.launch()
features/patternnet_clip.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a21d512ab9fd037ac31f1752948eee34c51a31e57a2bffe7e3d253e861ce3b7f
3
+ size 96401525