import math import tensorflow as tf import tfimm import efficientnet import efficientnet.tfkeras as efnv1 import keras_efficientnet_v2 as efnv2 import tensorflow_hub as hub class DotDict(dict): """dot.notation access to dictionary attributes Reference: https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767 """ __getattr__ = dict.get # returns None if missing key, don't use getattr() with default! __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ def get_cfg(rst_file): json_file = str(rst_file).replace('.h5', '_config.json') config_dict = json.load(open(json_file)) return DotDict(config_dict) def get_embeddings(img, embed_model): inp = img[None, ...] embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True) return embeddings # Train embeddings have to be re-ordered: embeddings were concatenated (train, valid) # in the training notebook and the valid fold is different for each ensemble model. FOLDS = 10 shards, n_total = [], 0 for fold in range(10): n_img = 5104 if fold <= 2 else 5103 shards.append(list(range(n_total, n_total + n_img))) n_total += n_img assert n_total == 51033 def get_train_idx(use_fold): "Return embedding index that restores the order of images in the tfrec files." train_folds = [i for i in range(10) if i % FOLDS != use_fold] valid_folds = [i for i in range(10) if i % FOLDS == use_fold] folds = train_folds + valid_folds # order of saved embeddings (train + valid) train_idx = [] for fold in folds: train_idx.append(shards[fold]) train_idx = np.concatenate(train_idx) return np.argsort(train_idx) use_fold = { 'efnv1b7_colab216_emb.npz': 4, 'efnv1b7_colab225_emb.npz': 1, 'efnv1b7_colab197_emb.npz': 0, 'efnv1b7_colab227_emb.npz': 5, 'efnv1b7_v72_emb.npz': 6, 'efnv1b7_colab229_emb.npz': 9, 'efnv1b6_colab217_emb.npz': 5, 'efnv1b6_colab218_emb.npz': 6, 'hub_efnv2xl_colab221_emb.npz': 8, 'hub_efnv2xl_v69_emb.npz': 2, 'hub_efnv2xl_v73_emb.npz': 0, 'efnv1b6_colab226_emb.npz': 2, 'hub_efnv2l_v70_emb.npz': 3, 'hub_efnv2l_colab200_emb.npz': 2, 'hub_efnv2l_colab199_emb.npz': 1, 'convnext_base_384_in22ft1k_v68_emb.npz': 0, 'convnext_base_384_in22ft1k_colab220_emb.npz': 9, 'convnext_base_384_in22ft1k_colab201_emb.npz': 3, # new } def get_comp_embeddings(rst_files): "Load embeddings for competition images [n_images, embedding_size]" comp_embeddings = [] for rst_file in rst_files: # Get embeddings for all competition images npz_file = Path(rst_file.replace('.h5', '_emb.npz')).name d = np.load(str(Path(emb_path) / npz_file)) comp_train_emb = d['train'] comp_test_emb = d['test'] # Restore original order of comp_train_emb, targets (use targets as fingerprint-check) comp_train_idx = get_train_idx(use_fold[npz_file]) comp_train_emb = comp_train_emb[comp_train_idx, :] comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0) assert comp_embs.shape == (n_images, embedding_size) # Normalize embeddings comp_embs_norms = np.linalg.norm(comp_embs, axis=1) print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max()) comp_embs /= comp_embs_norms[:, None] comp_embeddings.append(comp_embs) return np.concatenate(comp_embeddings, axis=1) def get_test_embedding(embed_models, sizes): test_embedding, similarities = [], [] for embed_model, size in zip(embed_models, sizes): # Get model input scaled_img = tf.image.resize(img, size) scaled_img = tf.cast(scaled_img, tf.float32) / 255.0 #print("test image normalized and resized to", scaled_img.shape[:2]) # Get embedding for test image test_emb = get_embeddings(scaled_img, embed_model) # shape: [1, embedding_size] assert test_emb.shape == (1, embedding_size) # Normalize embeddings test_emb_norm = np.linalg.norm(test_emb, axis=1) #print("test_emb norm: ", test_emb_norm[0]) test_emb /= test_emb_norm[:, None] test_embedding.append(test_emb) return np.concatenate(test_embedding, axis=1) # [1, embedding_size] class ArcMarginProductSubCenter(tf.keras.layers.Layer): ''' Implements large margin arc distance. References: https://arxiv.org/pdf/1801.07698.pdf https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/ https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/ Sub-center version: for k > 1, the embedding layer can learn k sub-centers per class ''' def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False, ls_eps=0.0, **kwargs): super(ArcMarginProductSubCenter, self).__init__(**kwargs) self.n_classes = n_classes self.s = s self.m = m self.k = k self.ls_eps = ls_eps self.easy_margin = easy_margin self.cos_m = tf.math.cos(m) self.sin_m = tf.math.sin(m) self.th = tf.math.cos(math.pi - m) self.mm = tf.math.sin(math.pi - m) * m def get_config(self): config = super().get_config().copy() config.update({ 'n_classes': self.n_classes, 's': self.s, 'm': self.m, 'k': self.k, 'ls_eps': self.ls_eps, 'easy_margin': self.easy_margin, }) return config def build(self, input_shape): super(ArcMarginProductSubCenter, self).build(input_shape[0]) self.W = self.add_weight( name='W', shape=(int(input_shape[0][-1]), self.n_classes * self.k), initializer='glorot_uniform', dtype='float32', trainable=True) def call(self, inputs): X, y = inputs y = tf.cast(y, dtype=tf.int32) cosine_all = tf.matmul( tf.math.l2_normalize(X, axis=1), tf.math.l2_normalize(self.W, axis=0) ) if self.k > 1: cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k]) cosine = tf.math.reduce_max(cosine_all, axis=2) else: cosine = cosine_all sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m if self.easy_margin: phi = tf.where(cosine > 0, phi, cosine) else: phi = tf.where(cosine > self.th, phi, cosine - self.mm) one_hot = tf.cast( tf.one_hot(y, depth=self.n_classes), dtype=cosine.dtype ) if self.ls_eps > 0: one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s return output TFHUB = { 'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2", 'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2", 'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2", 'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2", 'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1", 'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1", 'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1", 'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1", 'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1", } def get_model(cfg): aux_arcface = False # Chris Deotte suggested this if cfg.head == 'arcface2': head = ArcMarginPenaltyLogists elif cfg.head == 'arcface': head = ArcMarginProductSubCenter elif cfg.head == 'addface': head = AddMarginProductSubCenter else: assert False, "INVALID HEAD" if cfg.adaptive_margin: # define adaptive margins depending on class frequencies (dynamic margins) df = pd.read_csv(f'{project_dir}/train.csv') fewness = df['individual_id'].value_counts().sort_index() ** (-1/4) fewness -= fewness.min() fewness /= fewness.max() - fewness.min() adaptive_margin = cfg.margin_min + fewness * (cfg.margin_max - cfg.margin_min) # align margins with targets splits_path = '/kaggle/input/happywhale-splits' with open (f'{splits_path}/individual_ids.json', "r") as f: target_encodings = json.loads(f.read()) # individual_id: index individual_ids = pd.Series(target_encodings).sort_values().index.values adaptive_margin = adaptive_margin.loc[individual_ids].values.astype(np.float32) if cfg.arch_name.startswith('efnv1'): EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1, 'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3, 'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5, 'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7} if cfg.arch_name.startswith('efnv2'): EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M, 'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL} with strategy.scope(): margin = head( n_classes = cfg.N_CLASSES, s = 30, m = adaptive_margin if cfg.adaptive_margin else 0.3, k = cfg.subcenters or 1, easy_margin = False, name=f'head/{cfg.head}', dtype='float32') inp = tf.keras.layers.Input(shape = [*cfg.IMAGE_SIZE, 3], name = 'inp1') label = tf.keras.layers.Input(shape = (), name = 'inp2') if aux_arcface: label2 = tf.keras.layers.Input(shape = (), name = 'inp3') if cfg.arch_name.startswith('efnv1'): x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp) if cfg.pool == 'flatten': embed = tf.keras.layers.Flatten()(x) elif cfg.pool == 'fc': embed = tf.keras.layers.Flatten()(x) embed = tf.keras.layers.Dropout(0.1)(embed) embed = tf.keras.layers.Dense(1024)(embed) elif cfg.pool == 'gem': embed = GeMPoolingLayer(train_p=True)(x) elif cfg.pool == 'concat': embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x), tf.keras.layers.GlobalAveragePooling2D()(x)]) elif cfg.pool == 'max': embed = tf.keras.layers.GlobalMaxPooling2D()(x) else: embed = tf.keras.layers.GlobalAveragePooling2D()(x) elif cfg.arch_name.startswith('efnv2'): x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0, pretrained=cfg.pretrained)(inp) if cfg.pool == 'flatten': embed = tf.keras.layers.Flatten()(x) elif cfg.pool == 'fc': embed = tf.keras.layers.Flatten()(x) embed = tf.keras.layers.Dropout(0.1)(embed) embed = tf.keras.layers.Dense(1024)(embed) elif cfg.pool == 'gem': embed = GeMPoolingLayer(train_p=True)(x) elif cfg.pool == 'concat': embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x), tf.keras.layers.GlobalAveragePooling2D()(x)]) elif cfg.pool == 'max': embed = tf.keras.layers.GlobalMaxPooling2D()(x) else: embed = tf.keras.layers.GlobalAveragePooling2D()(x) elif cfg.arch_name in TFHUB: # tfhub models cannot be modified => Pooling cannot be changed! url = TFHUB[cfg.arch_name] model = hub.KerasLayer(url, trainable=True) embed = model(inp) #print(f"{cfg.arch_name} from tfhub") assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!' elif cfg.arch_name in tfimm.list_models(pretrained="timm"): #print(f"{cfg.arch_name} from tfimm") #embed = tfimm.create_model(cfg.arch_name, pretrained="timm", nb_classes=0)(inp) embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp) # create_model(nb_classes=0) includes pooling as last layer if len(cfg.dropout_ps) > 0: # Chris Deotte posted model code without Dropout/FC1 after pooling embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed) embed = tf.keras.layers.Dense(1024)(embed) # tunable embedding size embed = tf.keras.layers.BatchNormalization()(embed) # missing in public notebooks x = margin([embed, label]) output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x) if cfg.aux_loss and aux_arcface: # Use 2nd arcface head for species (aux loss) head2 = ArcMarginProductSubCenter margin2 = head( n_classes = cfg.n_species, s = 30, m = 0.3, k = 1, easy_margin = False, name=f'auxhead/{cfg.head}', dtype='float32') aux_features = margin2([embed, label2]) aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features) elif cfg.aux_loss: aux_features = tf.keras.layers.Dense(cfg.n_species)(embed) aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features) inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label] outputs = (output, aux_output) if cfg.aux_loss else [output] model = tf.keras.models.Model(inputs=inputs, outputs=outputs) embed_model = tf.keras.models.Model(inputs=inp, outputs=embed) opt = tf.keras.optimizers.Adam(learning_rate=cfg.LR) if cfg.FREEZE_BATCH_NORM: freeze_BN(model) return model, embed_model