Spaces:
Sleeping
Sleeping
Commit
·
9ce9714
1
Parent(s):
4f64be2
fix imports, cleanup
Browse files
app.py
CHANGED
@@ -10,15 +10,14 @@ import os
|
|
10 |
import glob
|
11 |
from shutil import rmtree
|
12 |
from pathlib import Path
|
13 |
-
from subprocess import run
|
14 |
-
import json
|
15 |
|
16 |
import gradio as gr
|
17 |
from huggingface_hub import hf_hub_download
|
|
|
18 |
from yolov5 import detect
|
19 |
import numpy as np
|
20 |
from tensorflow.keras import backend as K
|
21 |
-
from utils import get_model, get_cfg,
|
22 |
|
23 |
|
24 |
# YOLOv5 parameters
|
@@ -31,14 +30,13 @@ working = Path(os.getcwd())
|
|
31 |
modelbox = "yellowdolphin/happywhale-models"
|
32 |
checkpoint_files = [hf_hub_download(modelbox, f'yolov5_l6_{yolo_input_size}_fold{x}.pt') for x in versions]
|
33 |
image_root = working / 'images'
|
|
|
34 |
|
35 |
|
36 |
# Individual identifier parameters
|
37 |
max_distance = 0.865
|
38 |
normalize_similarity = None # test-train, None
|
39 |
-
|
40 |
-
threshold = 0.09951 if (normalize_similarity == 'test-train') else 0.6 # 0.381
|
41 |
-
knn = 300
|
42 |
rst_names = 'convnext_base_384_in22ft1k_colab220 efnv1b7_colab216 hub_efnv2xl_v73'.split()
|
43 |
use_fold = {
|
44 |
'efnv1b7_colab216': 4,
|
@@ -49,12 +47,12 @@ use_fold = {
|
|
49 |
'efnv1b7_colab229': 9,
|
50 |
'efnv1b6_colab217': 5,
|
51 |
'efnv1b6_colab218': 6,
|
52 |
-
'hub_efnv2xl_colab221': 8,
|
53 |
'hub_efnv2xl_v69': 2,
|
54 |
'hub_efnv2xl_v73': 0,
|
55 |
'efnv1b6_colab226': 2,
|
56 |
'hub_efnv2l_v70': 3,
|
57 |
-
'hub_efnv2l_colab200': 2,
|
58 |
'hub_efnv2l_colab199': 1,
|
59 |
'convnext_base_384_in22ft1k_v68': 0,
|
60 |
'convnext_base_384_in22ft1k_colab220': 9,
|
@@ -73,7 +71,6 @@ def fast_yolo_crop(image):
|
|
73 |
|
74 |
mpimg.imsave(yolo_source, image)
|
75 |
|
76 |
-
#print(f"\nInference on best {len(checkpoint_files[5:])} models with detect.py ...")
|
77 |
detect.run(weights=checkpoint_files[4:],
|
78 |
source=yolo_source,
|
79 |
data='data/dataset.yaml',
|
@@ -87,7 +84,6 @@ def fast_yolo_crop(image):
|
|
87 |
exist_ok=True,
|
88 |
name=str(working / 'results_ensemble'))
|
89 |
|
90 |
-
#print(f"YOLOv5 inference finished in {(perf_counter() - t0) / 60:.2f} min")
|
91 |
cropped = sorted(glob(f'{working}/results_ensemble/crops/*/{Path(yolo_source).name}'))
|
92 |
assert len(cropped) == 1, f'{len(cropped)} maritime species detected'
|
93 |
cropped = cropped[0]
|
@@ -102,7 +98,7 @@ comp_embeddings = get_comp_embeddings(emb_files, use_folds)
|
|
102 |
# Preload embedding models, input sizes
|
103 |
K.clear_session()
|
104 |
embed_models, sizes = [], []
|
105 |
-
for cfg_file, rst_file, npz_file in zip
|
106 |
cfg = get_cfg(cfg_file)
|
107 |
assert cfg.FOLD_TO_RUN == use_fold[npz_file]
|
108 |
cfg.pretrained = None # avoid weight downloads
|
@@ -112,7 +108,7 @@ for cfg_file, rst_file, npz_file in zip (cfg_files, rst_files, emb_files):
|
|
112 |
model, embed_model = get_model(cfg)
|
113 |
model.load_weights(rst_file)
|
114 |
print(f"\nWeights loaded from {rst_file}")
|
115 |
-
print(f"input_size {
|
116 |
f"DATASET {cfg.DATASET}, dropout_ps {cfg.dropout_ps}, subcenters {cfg.subcenters}")
|
117 |
embed_models.append(embed_model)
|
118 |
|
@@ -122,13 +118,13 @@ def pred_fn(image, fake=False):
|
|
122 |
x0, x1 = (int(f * image.shape[0]) for f in (0.2, 0.8))
|
123 |
y0, y1 = (int(f * image.shape[1]) for f in (0.2, 0.8))
|
124 |
cropped_image = image[x0:x1, y0:y1, :]
|
125 |
-
response_str =
|
126 |
"Go submit your photo on www.happywhale.com!"
|
127 |
return cropped_image, response_str
|
128 |
|
129 |
cropped_image, species = fast_yolo_crop(image)
|
130 |
-
test_embedding = get_test_embedding(embed_models, sizes)
|
131 |
-
|
132 |
cosine_similarity = np.dot(comp_embeddings, test_embedding[0]) / n_models
|
133 |
cosine_distances = 1 - cosine_similarity
|
134 |
normalized_distances = cosine_distances / max_distance
|
@@ -139,8 +135,8 @@ def pred_fn(image, fake=False):
|
|
139 |
confidence = get_confidence(max_similarity, threshold)
|
140 |
|
141 |
print(f"Similarities: {min_similarity:.4f} ... {max_similarity:.4f}")
|
142 |
-
print(f"Threshold:
|
143 |
-
|
144 |
if max_similarity > threshold:
|
145 |
response_str = f"This looks like a {species} I have seen before ({confidence:.3f} confidence).\n" \
|
146 |
"You might find its previous encounters on www.happywhale.com"
|
@@ -150,6 +146,7 @@ def pred_fn(image, fake=False):
|
|
150 |
|
151 |
return cropped_image, response_str
|
152 |
|
|
|
153 |
examples = [str(image_root / f'negative{i:03d}') for i in range(3)]
|
154 |
|
155 |
demo = gr.Interface(fn=pred_fn, inputs="image", outputs=["image", "text"],
|
|
|
10 |
import glob
|
11 |
from shutil import rmtree
|
12 |
from pathlib import Path
|
|
|
|
|
13 |
|
14 |
import gradio as gr
|
15 |
from huggingface_hub import hf_hub_download
|
16 |
+
import matplotlib.image as mpimg
|
17 |
from yolov5 import detect
|
18 |
import numpy as np
|
19 |
from tensorflow.keras import backend as K
|
20 |
+
from utils import get_model, get_cfg, get_comp_embeddings, get_test_embedding, get_confidence
|
21 |
|
22 |
|
23 |
# YOLOv5 parameters
|
|
|
30 |
modelbox = "yellowdolphin/happywhale-models"
|
31 |
checkpoint_files = [hf_hub_download(modelbox, f'yolov5_l6_{yolo_input_size}_fold{x}.pt') for x in versions]
|
32 |
image_root = working / 'images'
|
33 |
+
yolo_source = str(image_root / 'testimage.jpg')
|
34 |
|
35 |
|
36 |
# Individual identifier parameters
|
37 |
max_distance = 0.865
|
38 |
normalize_similarity = None # test-train, None
|
39 |
+
threshold = 0.09951 if (normalize_similarity == 'test-train') else 0.6 # 0.381
|
|
|
|
|
40 |
rst_names = 'convnext_base_384_in22ft1k_colab220 efnv1b7_colab216 hub_efnv2xl_v73'.split()
|
41 |
use_fold = {
|
42 |
'efnv1b7_colab216': 4,
|
|
|
47 |
'efnv1b7_colab229': 9,
|
48 |
'efnv1b6_colab217': 5,
|
49 |
'efnv1b6_colab218': 6,
|
50 |
+
'hub_efnv2xl_colab221': 8,
|
51 |
'hub_efnv2xl_v69': 2,
|
52 |
'hub_efnv2xl_v73': 0,
|
53 |
'efnv1b6_colab226': 2,
|
54 |
'hub_efnv2l_v70': 3,
|
55 |
+
'hub_efnv2l_colab200': 2,
|
56 |
'hub_efnv2l_colab199': 1,
|
57 |
'convnext_base_384_in22ft1k_v68': 0,
|
58 |
'convnext_base_384_in22ft1k_colab220': 9,
|
|
|
71 |
|
72 |
mpimg.imsave(yolo_source, image)
|
73 |
|
|
|
74 |
detect.run(weights=checkpoint_files[4:],
|
75 |
source=yolo_source,
|
76 |
data='data/dataset.yaml',
|
|
|
84 |
exist_ok=True,
|
85 |
name=str(working / 'results_ensemble'))
|
86 |
|
|
|
87 |
cropped = sorted(glob(f'{working}/results_ensemble/crops/*/{Path(yolo_source).name}'))
|
88 |
assert len(cropped) == 1, f'{len(cropped)} maritime species detected'
|
89 |
cropped = cropped[0]
|
|
|
98 |
# Preload embedding models, input sizes
|
99 |
K.clear_session()
|
100 |
embed_models, sizes = [], []
|
101 |
+
for cfg_file, rst_file, npz_file in zip(cfg_files, rst_files, emb_files):
|
102 |
cfg = get_cfg(cfg_file)
|
103 |
assert cfg.FOLD_TO_RUN == use_fold[npz_file]
|
104 |
cfg.pretrained = None # avoid weight downloads
|
|
|
108 |
model, embed_model = get_model(cfg)
|
109 |
model.load_weights(rst_file)
|
110 |
print(f"\nWeights loaded from {rst_file}")
|
111 |
+
print(f"input_size {cfg.IMAGE_SIZE}, fold {cfg.FOLD_TO_RUN}, arch {cfg.arch_name}, ",
|
112 |
f"DATASET {cfg.DATASET}, dropout_ps {cfg.dropout_ps}, subcenters {cfg.subcenters}")
|
113 |
embed_models.append(embed_model)
|
114 |
|
|
|
118 |
x0, x1 = (int(f * image.shape[0]) for f in (0.2, 0.8))
|
119 |
y0, y1 = (int(f * image.shape[1]) for f in (0.2, 0.8))
|
120 |
cropped_image = image[x0:x1, y0:y1, :]
|
121 |
+
response_str = "This looks like a common dolphin, but I have not seen this individual before (0.834 confidence).\n" \
|
122 |
"Go submit your photo on www.happywhale.com!"
|
123 |
return cropped_image, response_str
|
124 |
|
125 |
cropped_image, species = fast_yolo_crop(image)
|
126 |
+
test_embedding = get_test_embedding(cropped_image, embed_models, sizes)
|
127 |
+
|
128 |
cosine_similarity = np.dot(comp_embeddings, test_embedding[0]) / n_models
|
129 |
cosine_distances = 1 - cosine_similarity
|
130 |
normalized_distances = cosine_distances / max_distance
|
|
|
135 |
confidence = get_confidence(max_similarity, threshold)
|
136 |
|
137 |
print(f"Similarities: {min_similarity:.4f} ... {max_similarity:.4f}")
|
138 |
+
print(f"Threshold: {threshold}")
|
139 |
+
|
140 |
if max_similarity > threshold:
|
141 |
response_str = f"This looks like a {species} I have seen before ({confidence:.3f} confidence).\n" \
|
142 |
"You might find its previous encounters on www.happywhale.com"
|
|
|
146 |
|
147 |
return cropped_image, response_str
|
148 |
|
149 |
+
|
150 |
examples = [str(image_root / f'negative{i:03d}') for i in range(3)]
|
151 |
|
152 |
demo = gr.Interface(fn=pred_fn, inputs="image", outputs=["image", "text"],
|
utils.py
CHANGED
@@ -1,14 +1,18 @@
|
|
1 |
import math
|
|
|
2 |
|
3 |
import numpy as np
|
4 |
import tensorflow as tf
|
5 |
import tfimm
|
6 |
-
import efficientnet
|
7 |
import efficientnet.tfkeras as efnv1
|
8 |
import keras_efficientnet_v2 as efnv2
|
9 |
import tensorflow_hub as hub
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
12 |
class DotDict(dict):
|
13 |
"""dot.notation access to dictionary attributes
|
14 |
|
@@ -19,7 +23,7 @@ class DotDict(dict):
|
|
19 |
__setattr__ = dict.__setitem__
|
20 |
__delattr__ = dict.__delitem__
|
21 |
|
22 |
-
|
23 |
def get_cfg(json_file):
|
24 |
json_file = str(json_file)
|
25 |
config_dict = json.load(open(json_file))
|
@@ -32,7 +36,7 @@ def get_embeddings(img, embed_model):
|
|
32 |
return embeddings
|
33 |
|
34 |
|
35 |
-
# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
|
36 |
# in the training notebook and the valid fold is different for each ensemble model.
|
37 |
FOLDS = 10
|
38 |
shards, n_total = [], 0
|
@@ -42,6 +46,7 @@ for fold in range(10):
|
|
42 |
n_total += n_img
|
43 |
assert n_total == 51033
|
44 |
|
|
|
45 |
def get_train_idx(use_fold):
|
46 |
"Return embedding index that restores the order of images in the tfrec files."
|
47 |
train_folds = [i for i in range(10) if i % FOLDS != use_fold]
|
@@ -53,14 +58,12 @@ def get_train_idx(use_fold):
|
|
53 |
for fold in folds:
|
54 |
train_idx.append(shards[fold])
|
55 |
train_idx = np.concatenate(train_idx)
|
56 |
-
|
57 |
return np.argsort(train_idx)
|
58 |
|
59 |
|
60 |
def get_comp_embeddings(emb_files, use_folds):
|
61 |
"Load embeddings for competition images [n_images, embedding_size]"
|
62 |
-
embedding_size = 1024
|
63 |
-
n_images = 51033 + 27956
|
64 |
comp_embeddings = []
|
65 |
|
66 |
for npz_file, use_fold in zip(emb_files, use_folds):
|
@@ -68,7 +71,7 @@ def get_comp_embeddings(emb_files, use_folds):
|
|
68 |
d = np.load(str(npz_file))
|
69 |
comp_train_emb = d['train']
|
70 |
comp_test_emb = d['test']
|
71 |
-
|
72 |
# Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
|
73 |
comp_train_idx = get_train_idx(use_fold)
|
74 |
comp_train_emb = comp_train_emb[comp_train_idx, :]
|
@@ -85,22 +88,20 @@ def get_comp_embeddings(emb_files, use_folds):
|
|
85 |
return np.concatenate(comp_embeddings, axis=1)
|
86 |
|
87 |
|
88 |
-
def get_test_embedding(embed_models, sizes):
|
89 |
-
test_embedding
|
90 |
|
91 |
for embed_model, size in zip(embed_models, sizes):
|
92 |
# Get model input
|
93 |
-
|
94 |
-
|
95 |
-
#print("test image normalized and resized to", scaled_img.shape[:2])
|
96 |
|
97 |
# Get embedding for test image
|
98 |
-
test_emb = get_embeddings(
|
99 |
assert test_emb.shape == (1, embedding_size)
|
100 |
|
101 |
# Normalize embeddings
|
102 |
test_emb_norm = np.linalg.norm(test_emb, axis=1)
|
103 |
-
#print("test_emb norm: ", test_emb_norm[0])
|
104 |
test_emb /= test_emb_norm[:, None]
|
105 |
|
106 |
test_embedding.append(test_emb)
|
@@ -108,6 +109,23 @@ def get_test_embedding(embed_models, sizes):
|
|
108 |
return np.concatenate(test_embedding, axis=1) # [1, embedding_size]
|
109 |
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
class ArcMarginProductSubCenter(tf.keras.layers.Layer):
|
112 |
'''
|
113 |
Implements large margin arc distance.
|
@@ -204,56 +222,39 @@ TFHUB = {
|
|
204 |
|
205 |
def get_model(cfg):
|
206 |
aux_arcface = False # Chris Deotte suggested this
|
207 |
-
if cfg.head == '
|
208 |
-
head = ArcMarginPenaltyLogists
|
209 |
-
elif cfg.head == 'arcface':
|
210 |
head = ArcMarginProductSubCenter
|
211 |
-
elif cfg.head == 'addface':
|
212 |
-
head = AddMarginProductSubCenter
|
213 |
else:
|
214 |
assert False, "INVALID HEAD"
|
215 |
|
216 |
if cfg.adaptive_margin:
|
217 |
-
|
218 |
-
|
219 |
-
fewness = df['individual_id'].value_counts().sort_index() ** (-1/4)
|
220 |
-
fewness -= fewness.min()
|
221 |
-
fewness /= fewness.max() - fewness.min()
|
222 |
-
adaptive_margin = cfg.margin_min + fewness * (cfg.margin_max - cfg.margin_min)
|
223 |
-
|
224 |
-
# align margins with targets
|
225 |
-
splits_path = '/kaggle/input/happywhale-splits'
|
226 |
-
with open (f'{splits_path}/individual_ids.json', "r") as f:
|
227 |
-
target_encodings = json.loads(f.read()) # individual_id: index
|
228 |
-
individual_ids = pd.Series(target_encodings).sort_values().index.values
|
229 |
-
adaptive_margin = adaptive_margin.loc[individual_ids].values.astype(np.float32)
|
230 |
-
|
231 |
if cfg.arch_name.startswith('efnv1'):
|
232 |
-
EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
|
233 |
'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
|
234 |
-
'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
|
235 |
'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}
|
236 |
|
237 |
if cfg.arch_name.startswith('efnv2'):
|
238 |
EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
|
239 |
'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}
|
240 |
|
241 |
-
|
242 |
-
with strategy.scope():
|
243 |
|
244 |
margin = head(
|
245 |
-
n_classes
|
246 |
-
s
|
247 |
-
m
|
248 |
-
k
|
249 |
-
easy_margin
|
250 |
-
name=f'head/{cfg.head}',
|
251 |
dtype='float32')
|
252 |
|
253 |
-
inp = tf.keras.layers.Input(shape
|
254 |
-
label = tf.keras.layers.Input(shape
|
255 |
if aux_arcface:
|
256 |
-
label2 = tf.keras.layers.Input(shape
|
257 |
|
258 |
if cfg.arch_name.startswith('efnv1'):
|
259 |
x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
|
@@ -263,8 +264,6 @@ def get_model(cfg):
|
|
263 |
embed = tf.keras.layers.Flatten()(x)
|
264 |
embed = tf.keras.layers.Dropout(0.1)(embed)
|
265 |
embed = tf.keras.layers.Dense(1024)(embed)
|
266 |
-
elif cfg.pool == 'gem':
|
267 |
-
embed = GeMPoolingLayer(train_p=True)(x)
|
268 |
elif cfg.pool == 'concat':
|
269 |
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
|
270 |
tf.keras.layers.GlobalAveragePooling2D()(x)])
|
@@ -272,7 +271,7 @@ def get_model(cfg):
|
|
272 |
embed = tf.keras.layers.GlobalMaxPooling2D()(x)
|
273 |
else:
|
274 |
embed = tf.keras.layers.GlobalAveragePooling2D()(x)
|
275 |
-
|
276 |
elif cfg.arch_name.startswith('efnv2'):
|
277 |
x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
|
278 |
pretrained=cfg.pretrained)(inp)
|
@@ -282,8 +281,6 @@ def get_model(cfg):
|
|
282 |
embed = tf.keras.layers.Flatten()(x)
|
283 |
embed = tf.keras.layers.Dropout(0.1)(embed)
|
284 |
embed = tf.keras.layers.Dense(1024)(embed)
|
285 |
-
elif cfg.pool == 'gem':
|
286 |
-
embed = GeMPoolingLayer(train_p=True)(x)
|
287 |
elif cfg.pool == 'concat':
|
288 |
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
|
289 |
tf.keras.layers.GlobalAveragePooling2D()(x)])
|
@@ -297,15 +294,11 @@ def get_model(cfg):
|
|
297 |
url = TFHUB[cfg.arch_name]
|
298 |
model = hub.KerasLayer(url, trainable=True)
|
299 |
embed = model(inp)
|
300 |
-
#print(f"{cfg.arch_name} from tfhub")
|
301 |
assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'
|
302 |
-
|
303 |
elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
|
304 |
-
#print(f"{cfg.arch_name} from tfimm")
|
305 |
-
#embed = tfimm.create_model(cfg.arch_name, pretrained="timm", nb_classes=0)(inp)
|
306 |
embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)
|
307 |
-
|
308 |
-
|
309 |
if len(cfg.dropout_ps) > 0:
|
310 |
# Chris Deotte posted model code without Dropout/FC1 after pooling
|
311 |
embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
|
@@ -314,32 +307,17 @@ def get_model(cfg):
|
|
314 |
x = margin([embed, label])
|
315 |
|
316 |
output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)
|
317 |
-
|
318 |
-
if cfg.aux_loss and aux_arcface:
|
319 |
-
# Use 2nd arcface head for species (aux loss)
|
320 |
-
head2 = ArcMarginProductSubCenter
|
321 |
-
margin2 = head(
|
322 |
-
n_classes = cfg.n_species,
|
323 |
-
s = 30,
|
324 |
-
m = 0.3,
|
325 |
-
k = 1,
|
326 |
-
easy_margin = False,
|
327 |
-
name=f'auxhead/{cfg.head}',
|
328 |
-
dtype='float32')
|
329 |
-
aux_features = margin2([embed, label2])
|
330 |
-
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
|
331 |
|
332 |
-
|
333 |
aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
|
334 |
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
|
335 |
inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
|
336 |
outputs = (output, aux_output) if cfg.aux_loss else [output]
|
337 |
-
|
338 |
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
|
339 |
embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)
|
340 |
-
|
341 |
-
opt = tf.keras.optimizers.Adam(learning_rate=cfg.LR)
|
342 |
if cfg.FREEZE_BATCH_NORM:
|
343 |
-
|
344 |
-
|
345 |
return model, embed_model
|
|
|
1 |
import math
|
2 |
+
import json
|
3 |
|
4 |
import numpy as np
|
5 |
import tensorflow as tf
|
6 |
import tfimm
|
|
|
7 |
import efficientnet.tfkeras as efnv1
|
8 |
import keras_efficientnet_v2 as efnv2
|
9 |
import tensorflow_hub as hub
|
10 |
|
11 |
|
12 |
+
embedding_size = 1024
|
13 |
+
n_images = 51033 + 27956
|
14 |
+
|
15 |
+
|
16 |
class DotDict(dict):
|
17 |
"""dot.notation access to dictionary attributes
|
18 |
|
|
|
23 |
__setattr__ = dict.__setitem__
|
24 |
__delattr__ = dict.__delitem__
|
25 |
|
26 |
+
|
27 |
def get_cfg(json_file):
|
28 |
json_file = str(json_file)
|
29 |
config_dict = json.load(open(json_file))
|
|
|
36 |
return embeddings
|
37 |
|
38 |
|
39 |
+
# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
|
40 |
# in the training notebook and the valid fold is different for each ensemble model.
|
41 |
FOLDS = 10
|
42 |
shards, n_total = [], 0
|
|
|
46 |
n_total += n_img
|
47 |
assert n_total == 51033
|
48 |
|
49 |
+
|
50 |
def get_train_idx(use_fold):
|
51 |
"Return embedding index that restores the order of images in the tfrec files."
|
52 |
train_folds = [i for i in range(10) if i % FOLDS != use_fold]
|
|
|
58 |
for fold in folds:
|
59 |
train_idx.append(shards[fold])
|
60 |
train_idx = np.concatenate(train_idx)
|
61 |
+
|
62 |
return np.argsort(train_idx)
|
63 |
|
64 |
|
65 |
def get_comp_embeddings(emb_files, use_folds):
|
66 |
"Load embeddings for competition images [n_images, embedding_size]"
|
|
|
|
|
67 |
comp_embeddings = []
|
68 |
|
69 |
for npz_file, use_fold in zip(emb_files, use_folds):
|
|
|
71 |
d = np.load(str(npz_file))
|
72 |
comp_train_emb = d['train']
|
73 |
comp_test_emb = d['test']
|
74 |
+
|
75 |
# Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
|
76 |
comp_train_idx = get_train_idx(use_fold)
|
77 |
comp_train_emb = comp_train_emb[comp_train_idx, :]
|
|
|
88 |
return np.concatenate(comp_embeddings, axis=1)
|
89 |
|
90 |
|
91 |
+
def get_test_embedding(image, embed_models, sizes):
|
92 |
+
test_embedding = []
|
93 |
|
94 |
for embed_model, size in zip(embed_models, sizes):
|
95 |
# Get model input
|
96 |
+
scaled_image = tf.image.resize(image, size)
|
97 |
+
scaled_image = tf.cast(scaled_image, tf.float32) / 255.0
|
|
|
98 |
|
99 |
# Get embedding for test image
|
100 |
+
test_emb = get_embeddings(scaled_image, embed_model) # shape: [1, embedding_size]
|
101 |
assert test_emb.shape == (1, embedding_size)
|
102 |
|
103 |
# Normalize embeddings
|
104 |
test_emb_norm = np.linalg.norm(test_emb, axis=1)
|
|
|
105 |
test_emb /= test_emb_norm[:, None]
|
106 |
|
107 |
test_embedding.append(test_emb)
|
|
|
109 |
return np.concatenate(test_embedding, axis=1) # [1, embedding_size]
|
110 |
|
111 |
|
112 |
+
def p2logit(x):
|
113 |
+
return np.log(x / (1 - x))
|
114 |
+
|
115 |
+
|
116 |
+
def sigmoid(x):
|
117 |
+
return 1 / (1 + np.exp(-x))
|
118 |
+
|
119 |
+
|
120 |
+
def get_confidence(similarity, threshold):
|
121 |
+
"Calculate confidence in known/unknown prediction"
|
122 |
+
if similarity <= 0:
|
123 |
+
return 0
|
124 |
+
logit_sim = p2logit(similarity)
|
125 |
+
logit_threshold = p2logit(threshold)
|
126 |
+
return sigmoid(abs(logit_sim - logit_threshold))
|
127 |
+
|
128 |
+
|
129 |
class ArcMarginProductSubCenter(tf.keras.layers.Layer):
|
130 |
'''
|
131 |
Implements large margin arc distance.
|
|
|
222 |
|
223 |
def get_model(cfg):
|
224 |
aux_arcface = False # Chris Deotte suggested this
|
225 |
+
if cfg.head == 'arcface':
|
|
|
|
|
226 |
head = ArcMarginProductSubCenter
|
|
|
|
|
227 |
else:
|
228 |
assert False, "INVALID HEAD"
|
229 |
|
230 |
if cfg.adaptive_margin:
|
231 |
+
raise NotImplementedError
|
232 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
if cfg.arch_name.startswith('efnv1'):
|
234 |
+
EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
|
235 |
'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
|
236 |
+
'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
|
237 |
'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}
|
238 |
|
239 |
if cfg.arch_name.startswith('efnv2'):
|
240 |
EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
|
241 |
'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}
|
242 |
|
243 |
+
with tf.distribute.get_strategy().scope():
|
|
|
244 |
|
245 |
margin = head(
|
246 |
+
n_classes=cfg.N_CLASSES,
|
247 |
+
s=30,
|
248 |
+
m=0.3,
|
249 |
+
k=cfg.subcenters or 1,
|
250 |
+
easy_margin=False,
|
251 |
+
name=f'head/{cfg.head}',
|
252 |
dtype='float32')
|
253 |
|
254 |
+
inp = tf.keras.layers.Input(shape=[*cfg.IMAGE_SIZE, 3], name='inp1')
|
255 |
+
label = tf.keras.layers.Input(shape=(), name='inp2')
|
256 |
if aux_arcface:
|
257 |
+
label2 = tf.keras.layers.Input(shape=(), name='inp3')
|
258 |
|
259 |
if cfg.arch_name.startswith('efnv1'):
|
260 |
x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
|
|
|
264 |
embed = tf.keras.layers.Flatten()(x)
|
265 |
embed = tf.keras.layers.Dropout(0.1)(embed)
|
266 |
embed = tf.keras.layers.Dense(1024)(embed)
|
|
|
|
|
267 |
elif cfg.pool == 'concat':
|
268 |
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
|
269 |
tf.keras.layers.GlobalAveragePooling2D()(x)])
|
|
|
271 |
embed = tf.keras.layers.GlobalMaxPooling2D()(x)
|
272 |
else:
|
273 |
embed = tf.keras.layers.GlobalAveragePooling2D()(x)
|
274 |
+
|
275 |
elif cfg.arch_name.startswith('efnv2'):
|
276 |
x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
|
277 |
pretrained=cfg.pretrained)(inp)
|
|
|
281 |
embed = tf.keras.layers.Flatten()(x)
|
282 |
embed = tf.keras.layers.Dropout(0.1)(embed)
|
283 |
embed = tf.keras.layers.Dense(1024)(embed)
|
|
|
|
|
284 |
elif cfg.pool == 'concat':
|
285 |
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
|
286 |
tf.keras.layers.GlobalAveragePooling2D()(x)])
|
|
|
294 |
url = TFHUB[cfg.arch_name]
|
295 |
model = hub.KerasLayer(url, trainable=True)
|
296 |
embed = model(inp)
|
|
|
297 |
assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'
|
298 |
+
|
299 |
elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
|
|
|
|
|
300 |
embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)
|
301 |
+
|
|
|
302 |
if len(cfg.dropout_ps) > 0:
|
303 |
# Chris Deotte posted model code without Dropout/FC1 after pooling
|
304 |
embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
|
|
|
307 |
x = margin([embed, label])
|
308 |
|
309 |
output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
+
if cfg.aux_loss:
|
312 |
aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
|
313 |
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
|
314 |
inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
|
315 |
outputs = (output, aux_output) if cfg.aux_loss else [output]
|
316 |
+
|
317 |
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
|
318 |
embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)
|
319 |
+
|
|
|
320 |
if cfg.FREEZE_BATCH_NORM:
|
321 |
+
raise NotImplementedError
|
322 |
+
|
323 |
return model, embed_model
|