Spaces:

yellowdolphin
/

happywhale-demo

Sleeping

App Files Files Community

happywhale-demo / utils.py

yellowdolphin

fix imports, cleanup

9ce9714 over 2 years ago

raw

history blame

12 kB

	import math
	import json

	import numpy as np
	import tensorflow as tf
	import tfimm
	import efficientnet.tfkeras as efnv1
	import keras_efficientnet_v2 as efnv2
	import tensorflow_hub as hub


	embedding_size = 1024
	n_images = 51033 + 27956


	class DotDict(dict):
	"""dot.notation access to dictionary attributes

	Reference:
	https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767
	"""
	__getattr__ = dict.get # returns None if missing key, don't use getattr() with default!
	__setattr__ = dict.__setitem__
	__delattr__ = dict.__delitem__


	def get_cfg(json_file):
	json_file = str(json_file)
	config_dict = json.load(open(json_file))
	return DotDict(config_dict)


	def get_embeddings(img, embed_model):
	inp = img[None, ...]
	embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True)
	return embeddings


	# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
	# in the training notebook and the valid fold is different for each ensemble model.
	FOLDS = 10
	shards, n_total = [], 0
	for fold in range(10):
	n_img = 5104 if fold <= 2 else 5103
	shards.append(list(range(n_total, n_total + n_img)))
	n_total += n_img
	assert n_total == 51033


	def get_train_idx(use_fold):
	"Return embedding index that restores the order of images in the tfrec files."
	train_folds = [i for i in range(10) if i % FOLDS != use_fold]
	valid_folds = [i for i in range(10) if i % FOLDS == use_fold]
	folds = train_folds + valid_folds

	# order of saved embeddings (train + valid)
	train_idx = []
	for fold in folds:
	train_idx.append(shards[fold])
	train_idx = np.concatenate(train_idx)

	return np.argsort(train_idx)


	def get_comp_embeddings(emb_files, use_folds):
	"Load embeddings for competition images [n_images, embedding_size]"
	comp_embeddings = []

	for npz_file, use_fold in zip(emb_files, use_folds):
	# Get embeddings for all competition images
	d = np.load(str(npz_file))
	comp_train_emb = d['train']
	comp_test_emb = d['test']

	# Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
	comp_train_idx = get_train_idx(use_fold)
	comp_train_emb = comp_train_emb[comp_train_idx, :]
	comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0)
	assert comp_embs.shape == (n_images, embedding_size)

	# Normalize embeddings
	comp_embs_norms = np.linalg.norm(comp_embs, axis=1)
	print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max())
	comp_embs /= comp_embs_norms[:, None]

	comp_embeddings.append(comp_embs)

	return np.concatenate(comp_embeddings, axis=1)


	def get_test_embedding(image, embed_models, sizes):
	test_embedding = []

	for embed_model, size in zip(embed_models, sizes):
	# Get model input
	scaled_image = tf.image.resize(image, size)
	scaled_image = tf.cast(scaled_image, tf.float32) / 255.0

	# Get embedding for test image
	test_emb = get_embeddings(scaled_image, embed_model) # shape: [1, embedding_size]
	assert test_emb.shape == (1, embedding_size)

	# Normalize embeddings
	test_emb_norm = np.linalg.norm(test_emb, axis=1)
	test_emb /= test_emb_norm[:, None]

	test_embedding.append(test_emb)

	return np.concatenate(test_embedding, axis=1) # [1, embedding_size]


	def p2logit(x):
	return np.log(x / (1 - x))


	def sigmoid(x):
	return 1 / (1 + np.exp(-x))


	def get_confidence(similarity, threshold):
	"Calculate confidence in known/unknown prediction"
	if similarity <= 0:
	return 0
	logit_sim = p2logit(similarity)
	logit_threshold = p2logit(threshold)
	return sigmoid(abs(logit_sim - logit_threshold))


	class ArcMarginProductSubCenter(tf.keras.layers.Layer):
	'''
	Implements large margin arc distance.

	References:
	https://arxiv.org/pdf/1801.07698.pdf
	https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
	https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/

	Sub-center version:
	for k > 1, the embedding layer can learn k sub-centers per class
	'''
	def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False,
	ls_eps=0.0, **kwargs):

	super(ArcMarginProductSubCenter, self).__init__(**kwargs)

	self.n_classes = n_classes
	self.s = s
	self.m = m
	self.k = k
	self.ls_eps = ls_eps
	self.easy_margin = easy_margin
	self.cos_m = tf.math.cos(m)
	self.sin_m = tf.math.sin(m)
	self.th = tf.math.cos(math.pi - m)
	self.mm = tf.math.sin(math.pi - m) * m

	def get_config(self):

	config = super().get_config().copy()
	config.update({
	'n_classes': self.n_classes,
	's': self.s,
	'm': self.m,
	'k': self.k,
	'ls_eps': self.ls_eps,
	'easy_margin': self.easy_margin,
	})
	return config

	def build(self, input_shape):
	super(ArcMarginProductSubCenter, self).build(input_shape[0])

	self.W = self.add_weight(
	name='W',
	shape=(int(input_shape[0][-1]), self.n_classes * self.k),
	initializer='glorot_uniform',
	dtype='float32',
	trainable=True)

	def call(self, inputs):
	X, y = inputs
	y = tf.cast(y, dtype=tf.int32)
	cosine_all = tf.matmul(
	tf.math.l2_normalize(X, axis=1),
	tf.math.l2_normalize(self.W, axis=0)
	)
	if self.k > 1:
	cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k])
	cosine = tf.math.reduce_max(cosine_all, axis=2)
	else:
	cosine = cosine_all
	sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
	phi = cosine * self.cos_m - sine * self.sin_m
	if self.easy_margin:
	phi = tf.where(cosine > 0, phi, cosine)
	else:
	phi = tf.where(cosine > self.th, phi, cosine - self.mm)
	one_hot = tf.cast(
	tf.one_hot(y, depth=self.n_classes),
	dtype=cosine.dtype
	)
	if self.ls_eps > 0:
	one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

	output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
	output *= self.s
	return output


	TFHUB = {
	'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2",
	'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2",
	'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2",
	'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2",
	'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1",
	'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1",
	'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1",
	'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1",
	'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1",
	}


	def get_model(cfg):
	aux_arcface = False # Chris Deotte suggested this
	if cfg.head == 'arcface':
	head = ArcMarginProductSubCenter
	else:
	assert False, "INVALID HEAD"

	if cfg.adaptive_margin:
	raise NotImplementedError

	if cfg.arch_name.startswith('efnv1'):
	EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
	'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
	'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
	'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}

	if cfg.arch_name.startswith('efnv2'):
	EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
	'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}

	with tf.distribute.get_strategy().scope():

	margin = head(
	n_classes=cfg.N_CLASSES,
	s=30,
	m=0.3,
	k=cfg.subcenters or 1,
	easy_margin=False,
	name=f'head/{cfg.head}',
	dtype='float32')

	inp = tf.keras.layers.Input(shape=[*cfg.IMAGE_SIZE, 3], name='inp1')
	label = tf.keras.layers.Input(shape=(), name='inp2')
	if aux_arcface:
	label2 = tf.keras.layers.Input(shape=(), name='inp3')

	if cfg.arch_name.startswith('efnv1'):
	x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
	if cfg.pool == 'flatten':
	embed = tf.keras.layers.Flatten()(x)
	elif cfg.pool == 'fc':
	embed = tf.keras.layers.Flatten()(x)
	embed = tf.keras.layers.Dropout(0.1)(embed)
	embed = tf.keras.layers.Dense(1024)(embed)
	elif cfg.pool == 'concat':
	embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
	tf.keras.layers.GlobalAveragePooling2D()(x)])
	elif cfg.pool == 'max':
	embed = tf.keras.layers.GlobalMaxPooling2D()(x)
	else:
	embed = tf.keras.layers.GlobalAveragePooling2D()(x)

	elif cfg.arch_name.startswith('efnv2'):
	x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
	pretrained=cfg.pretrained)(inp)
	if cfg.pool == 'flatten':
	embed = tf.keras.layers.Flatten()(x)
	elif cfg.pool == 'fc':
	embed = tf.keras.layers.Flatten()(x)
	embed = tf.keras.layers.Dropout(0.1)(embed)
	embed = tf.keras.layers.Dense(1024)(embed)
	elif cfg.pool == 'concat':
	embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
	tf.keras.layers.GlobalAveragePooling2D()(x)])
	elif cfg.pool == 'max':
	embed = tf.keras.layers.GlobalMaxPooling2D()(x)
	else:
	embed = tf.keras.layers.GlobalAveragePooling2D()(x)

	elif cfg.arch_name in TFHUB:
	# tfhub models cannot be modified => Pooling cannot be changed!
	url = TFHUB[cfg.arch_name]
	model = hub.KerasLayer(url, trainable=True)
	embed = model(inp)
	assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'

	elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
	embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)

	if len(cfg.dropout_ps) > 0:
	# Chris Deotte posted model code without Dropout/FC1 after pooling
	embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
	embed = tf.keras.layers.Dense(1024)(embed) # tunable embedding size
	embed = tf.keras.layers.BatchNormalization()(embed) # missing in public notebooks
	x = margin([embed, label])

	output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)

	if cfg.aux_loss:
	aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
	aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
	inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
	outputs = (output, aux_output) if cfg.aux_loss else [output]

	model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
	embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)

	if cfg.FREEZE_BATCH_NORM:
	raise NotImplementedError

	return model, embed_model