Spaces:
Sleeping
Sleeping
import math | |
import json | |
import numpy as np | |
import tensorflow as tf | |
import tfimm | |
import efficientnet.tfkeras as efnv1 | |
import keras_efficientnet_v2 as efnv2 | |
import tensorflow_hub as hub | |
embedding_size = 1024 | |
n_images = 51033 + 27956 | |
class DotDict(dict): | |
"""dot.notation access to dictionary attributes | |
Reference: | |
https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767 | |
""" | |
__getattr__ = dict.get # returns None if missing key, don't use getattr() with default! | |
__setattr__ = dict.__setitem__ | |
__delattr__ = dict.__delitem__ | |
def get_cfg(json_file): | |
json_file = str(json_file) | |
config_dict = json.load(open(json_file)) | |
return DotDict(config_dict) | |
def get_embeddings(img, embed_model): | |
inp = img[None, ...] | |
embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True) | |
return embeddings | |
# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid) | |
# in the training notebook and the valid fold is different for each ensemble model. | |
FOLDS = 10 | |
shards, n_total = [], 0 | |
for fold in range(10): | |
n_img = 5104 if fold <= 2 else 5103 | |
shards.append(list(range(n_total, n_total + n_img))) | |
n_total += n_img | |
assert n_total == 51033 | |
def get_train_idx(use_fold): | |
"Return embedding index that restores the order of images in the tfrec files." | |
train_folds = [i for i in range(10) if i % FOLDS != use_fold] | |
valid_folds = [i for i in range(10) if i % FOLDS == use_fold] | |
folds = train_folds + valid_folds | |
# order of saved embeddings (train + valid) | |
train_idx = [] | |
for fold in folds: | |
train_idx.append(shards[fold]) | |
train_idx = np.concatenate(train_idx) | |
return np.argsort(train_idx) | |
def get_comp_embeddings(emb_files, use_folds): | |
"Load embeddings for competition images [n_images, embedding_size]" | |
comp_embeddings = [] | |
for npz_file, use_fold in zip(emb_files, use_folds): | |
# Get embeddings for all competition images | |
d = np.load(str(npz_file)) | |
comp_train_emb = d['train'] | |
comp_test_emb = d['test'] | |
# Restore original order of comp_train_emb, targets (use targets as fingerprint-check) | |
comp_train_idx = get_train_idx(use_fold) | |
comp_train_emb = comp_train_emb[comp_train_idx, :] | |
comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0) | |
assert comp_embs.shape == (n_images, embedding_size) | |
# Normalize embeddings | |
comp_embs_norms = np.linalg.norm(comp_embs, axis=1) | |
print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max()) | |
comp_embs /= comp_embs_norms[:, None] | |
comp_embeddings.append(comp_embs) | |
return np.concatenate(comp_embeddings, axis=1) | |
def get_test_embedding(image, embed_models, sizes): | |
test_embedding = [] | |
for embed_model, size in zip(embed_models, sizes): | |
# Get model input | |
scaled_image = tf.image.resize(image, size) | |
scaled_image = tf.cast(scaled_image, tf.float32) / 255.0 | |
# Get embedding for test image | |
test_emb = get_embeddings(scaled_image, embed_model) # shape: [1, embedding_size] | |
assert test_emb.shape == (1, embedding_size) | |
# Normalize embeddings | |
test_emb_norm = np.linalg.norm(test_emb, axis=1) | |
test_emb /= test_emb_norm[:, None] | |
test_embedding.append(test_emb) | |
return np.concatenate(test_embedding, axis=1) # [1, embedding_size] | |
def p2logit(x): | |
return np.log(x / (1 - x)) | |
def sigmoid(x): | |
return 1 / (1 + np.exp(-x)) | |
def get_confidence(similarity, threshold): | |
"Calculate confidence in known/unknown prediction" | |
if similarity <= 0: | |
return 0 | |
logit_sim = p2logit(similarity) | |
logit_threshold = p2logit(threshold) | |
return sigmoid(abs(logit_sim - logit_threshold)) | |
class ArcMarginProductSubCenter(tf.keras.layers.Layer): | |
''' | |
Implements large margin arc distance. | |
References: | |
https://arxiv.org/pdf/1801.07698.pdf | |
https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/ | |
https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/ | |
Sub-center version: | |
for k > 1, the embedding layer can learn k sub-centers per class | |
''' | |
def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False, | |
ls_eps=0.0, **kwargs): | |
super(ArcMarginProductSubCenter, self).__init__(**kwargs) | |
self.n_classes = n_classes | |
self.s = s | |
self.m = m | |
self.k = k | |
self.ls_eps = ls_eps | |
self.easy_margin = easy_margin | |
self.cos_m = tf.math.cos(m) | |
self.sin_m = tf.math.sin(m) | |
self.th = tf.math.cos(math.pi - m) | |
self.mm = tf.math.sin(math.pi - m) * m | |
def get_config(self): | |
config = super().get_config().copy() | |
config.update({ | |
'n_classes': self.n_classes, | |
's': self.s, | |
'm': self.m, | |
'k': self.k, | |
'ls_eps': self.ls_eps, | |
'easy_margin': self.easy_margin, | |
}) | |
return config | |
def build(self, input_shape): | |
super(ArcMarginProductSubCenter, self).build(input_shape[0]) | |
self.W = self.add_weight( | |
name='W', | |
shape=(int(input_shape[0][-1]), self.n_classes * self.k), | |
initializer='glorot_uniform', | |
dtype='float32', | |
trainable=True) | |
def call(self, inputs): | |
X, y = inputs | |
y = tf.cast(y, dtype=tf.int32) | |
cosine_all = tf.matmul( | |
tf.math.l2_normalize(X, axis=1), | |
tf.math.l2_normalize(self.W, axis=0) | |
) | |
if self.k > 1: | |
cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k]) | |
cosine = tf.math.reduce_max(cosine_all, axis=2) | |
else: | |
cosine = cosine_all | |
sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2)) | |
phi = cosine * self.cos_m - sine * self.sin_m | |
if self.easy_margin: | |
phi = tf.where(cosine > 0, phi, cosine) | |
else: | |
phi = tf.where(cosine > self.th, phi, cosine - self.mm) | |
one_hot = tf.cast( | |
tf.one_hot(y, depth=self.n_classes), | |
dtype=cosine.dtype | |
) | |
if self.ls_eps > 0: | |
one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes | |
output = (one_hot * phi) + ((1.0 - one_hot) * cosine) | |
output *= self.s | |
return output | |
TFHUB = { | |
'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2", | |
'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2", | |
'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2", | |
'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2", | |
'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1", | |
'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1", | |
'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1", | |
'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1", | |
'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1", | |
} | |
def get_model(cfg): | |
aux_arcface = False # Chris Deotte suggested this | |
if cfg.head == 'arcface': | |
head = ArcMarginProductSubCenter | |
else: | |
assert False, "INVALID HEAD" | |
if cfg.adaptive_margin: | |
raise NotImplementedError | |
if cfg.arch_name.startswith('efnv1'): | |
EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1, | |
'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3, | |
'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5, | |
'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7} | |
if cfg.arch_name.startswith('efnv2'): | |
EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M, | |
'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL} | |
with tf.distribute.get_strategy().scope(): | |
margin = head( | |
n_classes=cfg.N_CLASSES, | |
s=30, | |
m=0.3, | |
k=cfg.subcenters or 1, | |
easy_margin=False, | |
name=f'head/{cfg.head}', | |
dtype='float32') | |
inp = tf.keras.layers.Input(shape=[*cfg.IMAGE_SIZE, 3], name='inp1') | |
label = tf.keras.layers.Input(shape=(), name='inp2') | |
if aux_arcface: | |
label2 = tf.keras.layers.Input(shape=(), name='inp3') | |
if cfg.arch_name.startswith('efnv1'): | |
x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp) | |
if cfg.pool == 'flatten': | |
embed = tf.keras.layers.Flatten()(x) | |
elif cfg.pool == 'fc': | |
embed = tf.keras.layers.Flatten()(x) | |
embed = tf.keras.layers.Dropout(0.1)(embed) | |
embed = tf.keras.layers.Dense(1024)(embed) | |
elif cfg.pool == 'concat': | |
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x), | |
tf.keras.layers.GlobalAveragePooling2D()(x)]) | |
elif cfg.pool == 'max': | |
embed = tf.keras.layers.GlobalMaxPooling2D()(x) | |
else: | |
embed = tf.keras.layers.GlobalAveragePooling2D()(x) | |
elif cfg.arch_name.startswith('efnv2'): | |
x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0, | |
pretrained=cfg.pretrained)(inp) | |
if cfg.pool == 'flatten': | |
embed = tf.keras.layers.Flatten()(x) | |
elif cfg.pool == 'fc': | |
embed = tf.keras.layers.Flatten()(x) | |
embed = tf.keras.layers.Dropout(0.1)(embed) | |
embed = tf.keras.layers.Dense(1024)(embed) | |
elif cfg.pool == 'concat': | |
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x), | |
tf.keras.layers.GlobalAveragePooling2D()(x)]) | |
elif cfg.pool == 'max': | |
embed = tf.keras.layers.GlobalMaxPooling2D()(x) | |
else: | |
embed = tf.keras.layers.GlobalAveragePooling2D()(x) | |
elif cfg.arch_name in TFHUB: | |
# tfhub models cannot be modified => Pooling cannot be changed! | |
url = TFHUB[cfg.arch_name] | |
model = hub.KerasLayer(url, trainable=True) | |
embed = model(inp) | |
assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!' | |
elif cfg.arch_name in tfimm.list_models(pretrained="timm"): | |
embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp) | |
if len(cfg.dropout_ps) > 0: | |
# Chris Deotte posted model code without Dropout/FC1 after pooling | |
embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed) | |
embed = tf.keras.layers.Dense(1024)(embed) # tunable embedding size | |
embed = tf.keras.layers.BatchNormalization()(embed) # missing in public notebooks | |
x = margin([embed, label]) | |
output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x) | |
if cfg.aux_loss: | |
aux_features = tf.keras.layers.Dense(cfg.n_species)(embed) | |
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features) | |
inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label] | |
outputs = (output, aux_output) if cfg.aux_loss else [output] | |
model = tf.keras.models.Model(inputs=inputs, outputs=outputs) | |
embed_model = tf.keras.models.Model(inputs=inp, outputs=embed) | |
if cfg.FREEZE_BATCH_NORM: | |
raise NotImplementedError | |
return model, embed_model | |