happywhale-demo / utils.py
yellowdolphin's picture
fix imports, cleanup
9ce9714
raw
history blame
12 kB
import math
import json
import numpy as np
import tensorflow as tf
import tfimm
import efficientnet.tfkeras as efnv1
import keras_efficientnet_v2 as efnv2
import tensorflow_hub as hub
embedding_size = 1024
n_images = 51033 + 27956
class DotDict(dict):
"""dot.notation access to dictionary attributes
Reference:
https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767
"""
__getattr__ = dict.get # returns None if missing key, don't use getattr() with default!
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def get_cfg(json_file):
json_file = str(json_file)
config_dict = json.load(open(json_file))
return DotDict(config_dict)
def get_embeddings(img, embed_model):
inp = img[None, ...]
embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True)
return embeddings
# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
# in the training notebook and the valid fold is different for each ensemble model.
FOLDS = 10
shards, n_total = [], 0
for fold in range(10):
n_img = 5104 if fold <= 2 else 5103
shards.append(list(range(n_total, n_total + n_img)))
n_total += n_img
assert n_total == 51033
def get_train_idx(use_fold):
"Return embedding index that restores the order of images in the tfrec files."
train_folds = [i for i in range(10) if i % FOLDS != use_fold]
valid_folds = [i for i in range(10) if i % FOLDS == use_fold]
folds = train_folds + valid_folds
# order of saved embeddings (train + valid)
train_idx = []
for fold in folds:
train_idx.append(shards[fold])
train_idx = np.concatenate(train_idx)
return np.argsort(train_idx)
def get_comp_embeddings(emb_files, use_folds):
"Load embeddings for competition images [n_images, embedding_size]"
comp_embeddings = []
for npz_file, use_fold in zip(emb_files, use_folds):
# Get embeddings for all competition images
d = np.load(str(npz_file))
comp_train_emb = d['train']
comp_test_emb = d['test']
# Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
comp_train_idx = get_train_idx(use_fold)
comp_train_emb = comp_train_emb[comp_train_idx, :]
comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0)
assert comp_embs.shape == (n_images, embedding_size)
# Normalize embeddings
comp_embs_norms = np.linalg.norm(comp_embs, axis=1)
print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max())
comp_embs /= comp_embs_norms[:, None]
comp_embeddings.append(comp_embs)
return np.concatenate(comp_embeddings, axis=1)
def get_test_embedding(image, embed_models, sizes):
test_embedding = []
for embed_model, size in zip(embed_models, sizes):
# Get model input
scaled_image = tf.image.resize(image, size)
scaled_image = tf.cast(scaled_image, tf.float32) / 255.0
# Get embedding for test image
test_emb = get_embeddings(scaled_image, embed_model) # shape: [1, embedding_size]
assert test_emb.shape == (1, embedding_size)
# Normalize embeddings
test_emb_norm = np.linalg.norm(test_emb, axis=1)
test_emb /= test_emb_norm[:, None]
test_embedding.append(test_emb)
return np.concatenate(test_embedding, axis=1) # [1, embedding_size]
def p2logit(x):
return np.log(x / (1 - x))
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def get_confidence(similarity, threshold):
"Calculate confidence in known/unknown prediction"
if similarity <= 0:
return 0
logit_sim = p2logit(similarity)
logit_threshold = p2logit(threshold)
return sigmoid(abs(logit_sim - logit_threshold))
class ArcMarginProductSubCenter(tf.keras.layers.Layer):
'''
Implements large margin arc distance.
References:
https://arxiv.org/pdf/1801.07698.pdf
https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/
Sub-center version:
for k > 1, the embedding layer can learn k sub-centers per class
'''
def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False,
ls_eps=0.0, **kwargs):
super(ArcMarginProductSubCenter, self).__init__(**kwargs)
self.n_classes = n_classes
self.s = s
self.m = m
self.k = k
self.ls_eps = ls_eps
self.easy_margin = easy_margin
self.cos_m = tf.math.cos(m)
self.sin_m = tf.math.sin(m)
self.th = tf.math.cos(math.pi - m)
self.mm = tf.math.sin(math.pi - m) * m
def get_config(self):
config = super().get_config().copy()
config.update({
'n_classes': self.n_classes,
's': self.s,
'm': self.m,
'k': self.k,
'ls_eps': self.ls_eps,
'easy_margin': self.easy_margin,
})
return config
def build(self, input_shape):
super(ArcMarginProductSubCenter, self).build(input_shape[0])
self.W = self.add_weight(
name='W',
shape=(int(input_shape[0][-1]), self.n_classes * self.k),
initializer='glorot_uniform',
dtype='float32',
trainable=True)
def call(self, inputs):
X, y = inputs
y = tf.cast(y, dtype=tf.int32)
cosine_all = tf.matmul(
tf.math.l2_normalize(X, axis=1),
tf.math.l2_normalize(self.W, axis=0)
)
if self.k > 1:
cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k])
cosine = tf.math.reduce_max(cosine_all, axis=2)
else:
cosine = cosine_all
sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = tf.where(cosine > 0, phi, cosine)
else:
phi = tf.where(cosine > self.th, phi, cosine - self.mm)
one_hot = tf.cast(
tf.one_hot(y, depth=self.n_classes),
dtype=cosine.dtype
)
if self.ls_eps > 0:
one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.s
return output
TFHUB = {
'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2",
'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2",
'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2",
'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2",
'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1",
'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1",
'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1",
'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1",
'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1",
}
def get_model(cfg):
aux_arcface = False # Chris Deotte suggested this
if cfg.head == 'arcface':
head = ArcMarginProductSubCenter
else:
assert False, "INVALID HEAD"
if cfg.adaptive_margin:
raise NotImplementedError
if cfg.arch_name.startswith('efnv1'):
EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}
if cfg.arch_name.startswith('efnv2'):
EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}
with tf.distribute.get_strategy().scope():
margin = head(
n_classes=cfg.N_CLASSES,
s=30,
m=0.3,
k=cfg.subcenters or 1,
easy_margin=False,
name=f'head/{cfg.head}',
dtype='float32')
inp = tf.keras.layers.Input(shape=[*cfg.IMAGE_SIZE, 3], name='inp1')
label = tf.keras.layers.Input(shape=(), name='inp2')
if aux_arcface:
label2 = tf.keras.layers.Input(shape=(), name='inp3')
if cfg.arch_name.startswith('efnv1'):
x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
if cfg.pool == 'flatten':
embed = tf.keras.layers.Flatten()(x)
elif cfg.pool == 'fc':
embed = tf.keras.layers.Flatten()(x)
embed = tf.keras.layers.Dropout(0.1)(embed)
embed = tf.keras.layers.Dense(1024)(embed)
elif cfg.pool == 'concat':
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
tf.keras.layers.GlobalAveragePooling2D()(x)])
elif cfg.pool == 'max':
embed = tf.keras.layers.GlobalMaxPooling2D()(x)
else:
embed = tf.keras.layers.GlobalAveragePooling2D()(x)
elif cfg.arch_name.startswith('efnv2'):
x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
pretrained=cfg.pretrained)(inp)
if cfg.pool == 'flatten':
embed = tf.keras.layers.Flatten()(x)
elif cfg.pool == 'fc':
embed = tf.keras.layers.Flatten()(x)
embed = tf.keras.layers.Dropout(0.1)(embed)
embed = tf.keras.layers.Dense(1024)(embed)
elif cfg.pool == 'concat':
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
tf.keras.layers.GlobalAveragePooling2D()(x)])
elif cfg.pool == 'max':
embed = tf.keras.layers.GlobalMaxPooling2D()(x)
else:
embed = tf.keras.layers.GlobalAveragePooling2D()(x)
elif cfg.arch_name in TFHUB:
# tfhub models cannot be modified => Pooling cannot be changed!
url = TFHUB[cfg.arch_name]
model = hub.KerasLayer(url, trainable=True)
embed = model(inp)
assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'
elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)
if len(cfg.dropout_ps) > 0:
# Chris Deotte posted model code without Dropout/FC1 after pooling
embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
embed = tf.keras.layers.Dense(1024)(embed) # tunable embedding size
embed = tf.keras.layers.BatchNormalization()(embed) # missing in public notebooks
x = margin([embed, label])
output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)
if cfg.aux_loss:
aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
outputs = (output, aux_output) if cfg.aux_loss else [output]
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)
if cfg.FREEZE_BATCH_NORM:
raise NotImplementedError
return model, embed_model