happywhale-demo / utils.py
yellowdolphin's picture
initial version with models, embeddings
b9b435f
raw
history blame
14.6 kB
import math
import tensorflow as tf
import tfimm
import efficientnet
import efficientnet.tfkeras as efnv1
import keras_efficientnet_v2 as efnv2
import tensorflow_hub as hub
class DotDict(dict):
"""dot.notation access to dictionary attributes
Reference:
https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767
"""
__getattr__ = dict.get # returns None if missing key, don't use getattr() with default!
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def get_cfg(rst_file):
json_file = str(rst_file).replace('.h5', '_config.json')
config_dict = json.load(open(json_file))
return DotDict(config_dict)
def get_embeddings(img, embed_model):
inp = img[None, ...]
embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True)
return embeddings
# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
# in the training notebook and the valid fold is different for each ensemble model.
FOLDS = 10
shards, n_total = [], 0
for fold in range(10):
n_img = 5104 if fold <= 2 else 5103
shards.append(list(range(n_total, n_total + n_img)))
n_total += n_img
assert n_total == 51033
def get_train_idx(use_fold):
"Return embedding index that restores the order of images in the tfrec files."
train_folds = [i for i in range(10) if i % FOLDS != use_fold]
valid_folds = [i for i in range(10) if i % FOLDS == use_fold]
folds = train_folds + valid_folds
# order of saved embeddings (train + valid)
train_idx = []
for fold in folds:
train_idx.append(shards[fold])
train_idx = np.concatenate(train_idx)
return np.argsort(train_idx)
use_fold = {
'efnv1b7_colab216_emb.npz': 4,
'efnv1b7_colab225_emb.npz': 1,
'efnv1b7_colab197_emb.npz': 0,
'efnv1b7_colab227_emb.npz': 5,
'efnv1b7_v72_emb.npz': 6,
'efnv1b7_colab229_emb.npz': 9,
'efnv1b6_colab217_emb.npz': 5,
'efnv1b6_colab218_emb.npz': 6,
'hub_efnv2xl_colab221_emb.npz': 8,
'hub_efnv2xl_v69_emb.npz': 2,
'hub_efnv2xl_v73_emb.npz': 0,
'efnv1b6_colab226_emb.npz': 2,
'hub_efnv2l_v70_emb.npz': 3,
'hub_efnv2l_colab200_emb.npz': 2,
'hub_efnv2l_colab199_emb.npz': 1,
'convnext_base_384_in22ft1k_v68_emb.npz': 0,
'convnext_base_384_in22ft1k_colab220_emb.npz': 9,
'convnext_base_384_in22ft1k_colab201_emb.npz': 3, # new
}
def get_comp_embeddings(rst_files):
"Load embeddings for competition images [n_images, embedding_size]"
comp_embeddings = []
for rst_file in rst_files:
# Get embeddings for all competition images
npz_file = Path(rst_file.replace('.h5', '_emb.npz')).name
d = np.load(str(Path(emb_path) / npz_file))
comp_train_emb = d['train']
comp_test_emb = d['test']
# Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
comp_train_idx = get_train_idx(use_fold[npz_file])
comp_train_emb = comp_train_emb[comp_train_idx, :]
comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0)
assert comp_embs.shape == (n_images, embedding_size)
# Normalize embeddings
comp_embs_norms = np.linalg.norm(comp_embs, axis=1)
print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max())
comp_embs /= comp_embs_norms[:, None]
comp_embeddings.append(comp_embs)
return np.concatenate(comp_embeddings, axis=1)
def get_test_embedding(embed_models, sizes):
test_embedding, similarities = [], []
for embed_model, size in zip(embed_models, sizes):
# Get model input
scaled_img = tf.image.resize(img, size)
scaled_img = tf.cast(scaled_img, tf.float32) / 255.0
#print("test image normalized and resized to", scaled_img.shape[:2])
# Get embedding for test image
test_emb = get_embeddings(scaled_img, embed_model) # shape: [1, embedding_size]
assert test_emb.shape == (1, embedding_size)
# Normalize embeddings
test_emb_norm = np.linalg.norm(test_emb, axis=1)
#print("test_emb norm: ", test_emb_norm[0])
test_emb /= test_emb_norm[:, None]
test_embedding.append(test_emb)
return np.concatenate(test_embedding, axis=1) # [1, embedding_size]
class ArcMarginProductSubCenter(tf.keras.layers.Layer):
'''
Implements large margin arc distance.
References:
https://arxiv.org/pdf/1801.07698.pdf
https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/
Sub-center version:
for k > 1, the embedding layer can learn k sub-centers per class
'''
def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False,
ls_eps=0.0, **kwargs):
super(ArcMarginProductSubCenter, self).__init__(**kwargs)
self.n_classes = n_classes
self.s = s
self.m = m
self.k = k
self.ls_eps = ls_eps
self.easy_margin = easy_margin
self.cos_m = tf.math.cos(m)
self.sin_m = tf.math.sin(m)
self.th = tf.math.cos(math.pi - m)
self.mm = tf.math.sin(math.pi - m) * m
def get_config(self):
config = super().get_config().copy()
config.update({
'n_classes': self.n_classes,
's': self.s,
'm': self.m,
'k': self.k,
'ls_eps': self.ls_eps,
'easy_margin': self.easy_margin,
})
return config
def build(self, input_shape):
super(ArcMarginProductSubCenter, self).build(input_shape[0])
self.W = self.add_weight(
name='W',
shape=(int(input_shape[0][-1]), self.n_classes * self.k),
initializer='glorot_uniform',
dtype='float32',
trainable=True)
def call(self, inputs):
X, y = inputs
y = tf.cast(y, dtype=tf.int32)
cosine_all = tf.matmul(
tf.math.l2_normalize(X, axis=1),
tf.math.l2_normalize(self.W, axis=0)
)
if self.k > 1:
cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k])
cosine = tf.math.reduce_max(cosine_all, axis=2)
else:
cosine = cosine_all
sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = tf.where(cosine > 0, phi, cosine)
else:
phi = tf.where(cosine > self.th, phi, cosine - self.mm)
one_hot = tf.cast(
tf.one_hot(y, depth=self.n_classes),
dtype=cosine.dtype
)
if self.ls_eps > 0:
one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.s
return output
TFHUB = {
'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2",
'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2",
'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2",
'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2",
'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1",
'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1",
'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1",
'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1",
'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1",
}
def get_model(cfg):
aux_arcface = False # Chris Deotte suggested this
if cfg.head == 'arcface2':
head = ArcMarginPenaltyLogists
elif cfg.head == 'arcface':
head = ArcMarginProductSubCenter
elif cfg.head == 'addface':
head = AddMarginProductSubCenter
else:
assert False, "INVALID HEAD"
if cfg.adaptive_margin:
# define adaptive margins depending on class frequencies (dynamic margins)
df = pd.read_csv(f'{project_dir}/train.csv')
fewness = df['individual_id'].value_counts().sort_index() ** (-1/4)
fewness -= fewness.min()
fewness /= fewness.max() - fewness.min()
adaptive_margin = cfg.margin_min + fewness * (cfg.margin_max - cfg.margin_min)
# align margins with targets
splits_path = '/kaggle/input/happywhale-splits'
with open (f'{splits_path}/individual_ids.json', "r") as f:
target_encodings = json.loads(f.read()) # individual_id: index
individual_ids = pd.Series(target_encodings).sort_values().index.values
adaptive_margin = adaptive_margin.loc[individual_ids].values.astype(np.float32)
if cfg.arch_name.startswith('efnv1'):
EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}
if cfg.arch_name.startswith('efnv2'):
EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}
with strategy.scope():
margin = head(
n_classes = cfg.N_CLASSES,
s = 30,
m = adaptive_margin if cfg.adaptive_margin else 0.3,
k = cfg.subcenters or 1,
easy_margin = False,
name=f'head/{cfg.head}',
dtype='float32')
inp = tf.keras.layers.Input(shape = [*cfg.IMAGE_SIZE, 3], name = 'inp1')
label = tf.keras.layers.Input(shape = (), name = 'inp2')
if aux_arcface:
label2 = tf.keras.layers.Input(shape = (), name = 'inp3')
if cfg.arch_name.startswith('efnv1'):
x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
if cfg.pool == 'flatten':
embed = tf.keras.layers.Flatten()(x)
elif cfg.pool == 'fc':
embed = tf.keras.layers.Flatten()(x)
embed = tf.keras.layers.Dropout(0.1)(embed)
embed = tf.keras.layers.Dense(1024)(embed)
elif cfg.pool == 'gem':
embed = GeMPoolingLayer(train_p=True)(x)
elif cfg.pool == 'concat':
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
tf.keras.layers.GlobalAveragePooling2D()(x)])
elif cfg.pool == 'max':
embed = tf.keras.layers.GlobalMaxPooling2D()(x)
else:
embed = tf.keras.layers.GlobalAveragePooling2D()(x)
elif cfg.arch_name.startswith('efnv2'):
x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
pretrained=cfg.pretrained)(inp)
if cfg.pool == 'flatten':
embed = tf.keras.layers.Flatten()(x)
elif cfg.pool == 'fc':
embed = tf.keras.layers.Flatten()(x)
embed = tf.keras.layers.Dropout(0.1)(embed)
embed = tf.keras.layers.Dense(1024)(embed)
elif cfg.pool == 'gem':
embed = GeMPoolingLayer(train_p=True)(x)
elif cfg.pool == 'concat':
embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
tf.keras.layers.GlobalAveragePooling2D()(x)])
elif cfg.pool == 'max':
embed = tf.keras.layers.GlobalMaxPooling2D()(x)
else:
embed = tf.keras.layers.GlobalAveragePooling2D()(x)
elif cfg.arch_name in TFHUB:
# tfhub models cannot be modified => Pooling cannot be changed!
url = TFHUB[cfg.arch_name]
model = hub.KerasLayer(url, trainable=True)
embed = model(inp)
#print(f"{cfg.arch_name} from tfhub")
assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'
elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
#print(f"{cfg.arch_name} from tfimm")
#embed = tfimm.create_model(cfg.arch_name, pretrained="timm", nb_classes=0)(inp)
embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)
# create_model(nb_classes=0) includes pooling as last layer
if len(cfg.dropout_ps) > 0:
# Chris Deotte posted model code without Dropout/FC1 after pooling
embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
embed = tf.keras.layers.Dense(1024)(embed) # tunable embedding size
embed = tf.keras.layers.BatchNormalization()(embed) # missing in public notebooks
x = margin([embed, label])
output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)
if cfg.aux_loss and aux_arcface:
# Use 2nd arcface head for species (aux loss)
head2 = ArcMarginProductSubCenter
margin2 = head(
n_classes = cfg.n_species,
s = 30,
m = 0.3,
k = 1,
easy_margin = False,
name=f'auxhead/{cfg.head}',
dtype='float32')
aux_features = margin2([embed, label2])
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
elif cfg.aux_loss:
aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
outputs = (output, aux_output) if cfg.aux_loss else [output]
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)
opt = tf.keras.optimizers.Adam(learning_rate=cfg.LR)
if cfg.FREEZE_BATCH_NORM:
freeze_BN(model)
return model, embed_model