File size: 3,378 Bytes
6c29b84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea5c276
 
 
6c29b84
 
 
06758a2
6c29b84
c50d8cf
 
 
 
6c29b84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
import torch as th
import cv2
from PIL import Image
import time
import copy
import numpy as np
import open_clip
import torchvision.transforms as T

def calculate_l2_distances(query, gallery):
    return np.linalg.norm(gallery - query, axis=1)

def get_k_nearest_neighbors(distances, k):
    indices = np.argsort(distances)[:k]
    return indices

def get_similarity_l2(embeddings_gallery, embeddings_query, k):
    print('Processing indices...')

    s = time.time()

    scores = []
    indices = []

    for query in embeddings_query:
        distances = calculate_l2_distances(query, embeddings_gallery)
        nearest_indices = get_k_nearest_neighbors(distances, k)
        scores.append(distances[nearest_indices])
        indices.append(nearest_indices)

    e = time.time()

    print(f'Finished processing indices, took {e - s}s')
    return np.array(scores), np.array(indices)

def get_transform():  
    transform = T.Compose([
            T.Resize(
                size=(224, 224), 
                interpolation=T.InterpolationMode.BICUBIC,
                antialias=True),
            T.ToTensor(), 
            T.Normalize(
                mean=(0.48145466, 0.4578275, 0.40821073), 
                std=(0.26862954, 0.26130258, 0.27577711)
            )
        ])
    return transform

def convert_indices_to_labels(indices, labels):
    indices_copy = copy.deepcopy(indices)
    for row in indices_copy:
        for j in range(len(row)):
            row[j] = labels[row[j]]
    return indices_copy

def read_image(image_file):
    img = cv2.imread(
        image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
    )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if img is None:
        raise ValueError('Failed to read {}'.format(image_file))
    return img


def transform_img(image):
    img = image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    if isinstance(img, np.ndarray):
        img =  Image.fromarray(img)

    transform = get_transform()
        
    img = transform(img)

    return img

@th.no_grad()
def get_feature_vector_img(model, imgs, epoch=10, use_cuda=False):
    features = []
    if use_cuda:
        imgs = imgs.cuda()
    x = (model(imgs)).detach().cpu().numpy().astype(np.float32)  # .half()
    features.append(x)

    return np.concatenate(features, axis=0)

def Model():
    backbone = open_clip.create_model_and_transforms('ViT-H-14', None)[0].visual
    backbone.load_state_dict(th.load('./model1.pt'))  #https://huggingface.co/hca97/aicrowd-visual-recognition-models/blob/main/model1.pt
    # backbone, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:Thenujan/ViT-H-14')
    backbone.eval()   # Dropping unecessary layers
    return backbone


def find(image):
    # img_path = '/kaggle/input/vprtestdata/public_dataset/queries/abiding-inchworm-of-ultimate-freedom.jpeg'

    # image = read_image(img_path)
    image = np.array(image)

    image = transform_img(image)

    image = image.unsqueeze(dim=0)

    feature_vectors_gallery = np.load('./VPR image similarity search/gallary_embedding/feature_vectors_gallery.npy')

    model = Model()

    feature_vectors_query = get_feature_vector_img(model, image, 1)

    _, indices = get_similarity_l2(feature_vectors_gallery, feature_vectors_query, 1000)

    indices = indices.tolist()

    return indices