Spaces:

yellowdolphin
/

happywhale-demo

Sleeping

App Files Files Community

yellowdolphin commited on Nov 28, 2022

Commit

b9b435f

1 Parent(s): fa37760

initial version with models, embeddings

Browse files

Files changed (3) hide show

app.py +79 -20
requirements.txt +9 -0
utils.py +365 -0

app.py CHANGED Viewed

@@ -1,41 +1,100 @@
 import os
 from pathlib import Path
 from subprocess import run
 import gradio as gr
 yolo_input_size = 384
 versions = ('2_v108', '4_v109', '0_int6', '1_v110', '3_v111')
 score_thr = 0.025
-zoom_score_thr = 0.35
 iou_thr = 0.6
 max_det = 1
-yolo_ens = 'fast'  # fast, val, detect, detect_internal, all
-output_size = (512, 512)
-bs = 1 #128 if 'CUDA_VERSION' in os.environ else 16
-project_dir = None
 working = Path(os.getcwd())
 modelbox = working / 'models'
 checkpoint_files = [modelbox / f'yolov5_l6_{yolo_input_size}_fold{x}.pt' for x in versions]
-image_root = working / 'images' / 'subdir'
-image_urls = [
-    # Negatives
-    'https://upload.wikimedia.org/wikipedia/commons/c/c5/Common_Dolphin.jpg',
-    'https://upload.wikimedia.org/wikipedia/commons/b/b8/Beluga847.jpg',
-    'https://upload.wikimedia.org/wikipedia/commons/e/ea/Beluga_1_1999-07-03.jpg',
-    'https://upload.wikimedia.org/wikipedia/commons/2/2b/Whale_Watching_in_Gloucester%2C_Massachusetts_5.jpg',
-    # Positives
-    '/kaggle/input/happy-whale-and-dolphin/test_images/00098d1376dab2.jpg',
-]
-yolo_source = f'{image_root}/negative001.jpg'
-def pred_fn(image, fake=True):
     if fake:
         x0, x1 = (int(f * image.shape[0]) for f in (0.2, 0.8))
         y0, y1 = (int(f * image.shape[1]) for f in (0.2, 0.8))
@@ -72,4 +131,4 @@ examples = [str(image_root / f'negative{i:03d}') for i in range(3)]
 demo = gr.Interface(fn=pred_fn, inputs="image", outputs=["image", "text"],
                     examples=examples)
-demo.launch()

+# If TF version is not understood by tfimm requirements, try this:
+#try:
+#    import tfimm
+#except ModuleNotFoundError:
+#    !pip install --no-deps tfimm timm
+#    import timm
+#    import tfimm
 import os
+import glob
 from pathlib import Path
 from subprocess import run
+import json
 import gradio as gr
+from yolov5 import detect
+from utils import get_models, get_cfg, get_embeddings, get_comp_embeddings, get_test_embedding
+# YOLOv5 parameters
 yolo_input_size = 384
 versions = ('2_v108', '4_v109', '0_int6', '1_v110', '3_v111')
 score_thr = 0.025
 iou_thr = 0.6
 max_det = 1
 working = Path(os.getcwd())
 modelbox = working / 'models'
 checkpoint_files = [modelbox / f'yolov5_l6_{yolo_input_size}_fold{x}.pt' for x in versions]
+image_root = working / 'images'
+# Individual identifier parameters
+embedding_size = 1024
+n_images = 51033 + 27956
+max_distance = 0.865
+normalize_similarity = None  # test-train, None
+gamma = 0.4
+threshold = 0.09951 if (normalize_similarity == 'test-train') else 0.6 # 0.381
+knn = 300
+emb_path = '/kaggle/input/happywhale-embeddings'
+rst_path = '/kaggle/input/happywhale-models'
+rst_files = sorted(glob(f'{rst_path}/*.h5'))
+n_models = len(rst_files)
+def fast_yolo_crop(image):
+    !rm -rf {working}/labels {working}/results_ensemble
+    #%cd {working}/yolov5
+    %cd {working}
+    mpimg.imsave(yolo_source, image)
+    #print(f"\nInference on best {len(checkpoint_files[5:])} models with detect.py ...")
+    detect.run(weights=checkpoint_files[4:],
+               source=yolo_source,
+               data='data/dataset.yaml',
+               imgsz=yolo_input_size,
+               conf_thres=score_thr,
+               iou_thres=iou_thr,
+               max_det=max_det,
+               save_txt=False,
+               save_conf=False,
+               save_crop=True,
+               exist_ok=True,
+               name=str(working / 'results_ensemble'))
+    #print(f"YOLOv5 inference finished in {(perf_counter() - t0) / 60:.2f} min")
+    cropped = sorted(glob(f'{working}/results_ensemble/crops/*/{Path(yolo_source).name}'))
+    assert len(cropped) == 1, f'{len(cropped)} maritime species detected'
+    cropped = cropped[0]
+    species = Path(cropped).parent.name
+    cropped_image = mpimg.imread(cropped)
+    return cropped_image, species.replace('_', ' ')
+# Preload embeddings for known individuals
+comp_embeddings = get_comp_embeddings(rst_files)
+# Preload embedding models, input sizes
+K.clear_session()
+embed_models, sizes = [], []
+for rst_file in rst_files:
+    cfg = get_cfg(rst_file)
+    npz_file = Path(rst_file.replace('.h5', '_emb.npz')).name
+    assert cfg.FOLD_TO_RUN == use_fold[npz_file]
+    cfg.pretrained = None  # avoid weight downloads
+    if isinstance(cfg.IMAGE_SIZE, int):
+        cfg.IMAGE_SIZE = (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)
+    sizes.append(cfg.IMAGE_SIZE)
+    model, embed_model = get_model(cfg)
+    model.load_weights(rst_file)
+    print(f"\nWeights loaded from {rst_file}")
+    print(f"input_size {scaled_img.shape[:2]}, fold {cfg.FOLD_TO_RUN}, arch {cfg.arch_name}, ",
+          f"DATASET {cfg.DATASET}, dropout_ps {cfg.dropout_ps}, subcenters {cfg.subcenters}")
+    embed_models.append(embed_model)
+def pred_fn(image, fake=False):
     if fake:
         x0, x1 = (int(f * image.shape[0]) for f in (0.2, 0.8))
         y0, y1 = (int(f * image.shape[1]) for f in (0.2, 0.8))
 demo = gr.Interface(fn=pred_fn, inputs="image", outputs=["image", "text"],
                     examples=examples)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+yolov5
+ensemble-boxes
+tensorflow
+tfimm
+timm
+efficientnet
+keras-efficientnet-v2
+tensorflow-hub

utils.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import math
+import tensorflow as tf
+import tfimm
+import efficientnet
+import efficientnet.tfkeras as efnv1
+import keras_efficientnet_v2 as efnv2
+import tensorflow_hub as hub
+class DotDict(dict):
+    """dot.notation access to dictionary attributes
+    Reference:
+    https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767
+    """
+    __getattr__ = dict.get  # returns None if missing key, don't use getattr() with default!
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+def get_cfg(rst_file):
+    json_file = str(rst_file).replace('.h5', '_config.json')
+    config_dict = json.load(open(json_file))
+    return DotDict(config_dict)
+def get_embeddings(img, embed_model):
+    inp = img[None, ...]
+    embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True)
+    return embeddings
+# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
+# in the training notebook and the valid fold is different for each ensemble model.
+FOLDS = 10
+shards, n_total = [], 0
+for fold in range(10):
+    n_img = 5104 if fold <= 2 else 5103
+    shards.append(list(range(n_total, n_total + n_img)))
+    n_total += n_img
+assert n_total == 51033
+def get_train_idx(use_fold):
+    "Return embedding index that restores the order of images in the tfrec files."
+    train_folds = [i for i in range(10) if i % FOLDS != use_fold]
+    valid_folds = [i for i in range(10) if i % FOLDS == use_fold]
+    folds = train_folds + valid_folds
+    # order of saved embeddings (train + valid)
+    train_idx = []
+    for fold in folds:
+        train_idx.append(shards[fold])
+    train_idx = np.concatenate(train_idx)
+    return np.argsort(train_idx)
+use_fold = {
+    'efnv1b7_colab216_emb.npz': 4,
+    'efnv1b7_colab225_emb.npz': 1,
+    'efnv1b7_colab197_emb.npz': 0,
+    'efnv1b7_colab227_emb.npz': 5,
+    'efnv1b7_v72_emb.npz': 6,
+    'efnv1b7_colab229_emb.npz': 9,
+    'efnv1b6_colab217_emb.npz': 5,
+    'efnv1b6_colab218_emb.npz': 6,
+    'hub_efnv2xl_colab221_emb.npz': 8,
+    'hub_efnv2xl_v69_emb.npz': 2,
+    'hub_efnv2xl_v73_emb.npz': 0,
+    'efnv1b6_colab226_emb.npz': 2,
+    'hub_efnv2l_v70_emb.npz': 3,
+    'hub_efnv2l_colab200_emb.npz': 2,
+    'hub_efnv2l_colab199_emb.npz': 1,
+    'convnext_base_384_in22ft1k_v68_emb.npz': 0,
+    'convnext_base_384_in22ft1k_colab220_emb.npz': 9,
+    'convnext_base_384_in22ft1k_colab201_emb.npz': 3,  # new
+}
+def get_comp_embeddings(rst_files):
+    "Load embeddings for competition images [n_images, embedding_size]"
+    comp_embeddings = []
+    for rst_file in rst_files:
+        # Get embeddings for all competition images
+        npz_file = Path(rst_file.replace('.h5', '_emb.npz')).name
+        d = np.load(str(Path(emb_path) / npz_file))
+        comp_train_emb = d['train']
+        comp_test_emb = d['test']
+        # Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
+        comp_train_idx = get_train_idx(use_fold[npz_file])
+        comp_train_emb = comp_train_emb[comp_train_idx, :]
+        comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0)
+        assert comp_embs.shape == (n_images, embedding_size)
+        # Normalize embeddings
+        comp_embs_norms = np.linalg.norm(comp_embs, axis=1)
+        print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max())
+        comp_embs /= comp_embs_norms[:, None]
+        comp_embeddings.append(comp_embs)
+    return np.concatenate(comp_embeddings, axis=1)
+def get_test_embedding(embed_models, sizes):
+    test_embedding, similarities = [], []
+    for embed_model, size in zip(embed_models, sizes):
+        # Get model input
+        scaled_img = tf.image.resize(img, size)
+        scaled_img = tf.cast(scaled_img, tf.float32) / 255.0
+        #print("test image normalized and resized to", scaled_img.shape[:2])
+        # Get embedding for test image
+        test_emb = get_embeddings(scaled_img, embed_model)  # shape: [1, embedding_size]
+        assert test_emb.shape == (1, embedding_size)
+        # Normalize embeddings
+        test_emb_norm = np.linalg.norm(test_emb, axis=1)
+        #print("test_emb norm: ", test_emb_norm[0])
+        test_emb /= test_emb_norm[:, None]
+        test_embedding.append(test_emb)
+    return np.concatenate(test_embedding, axis=1)  # [1, embedding_size]
+class ArcMarginProductSubCenter(tf.keras.layers.Layer):
+    '''
+    Implements large margin arc distance.
+    References:
+        https://arxiv.org/pdf/1801.07698.pdf
+        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
+        https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/
+    Sub-center version:
+        for k > 1, the embedding layer can learn k sub-centers per class
+    '''
+    def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False,
+                 ls_eps=0.0, **kwargs):
+        super(ArcMarginProductSubCenter, self).__init__(**kwargs)
+        self.n_classes = n_classes
+        self.s = s
+        self.m = m
+        self.k = k
+        self.ls_eps = ls_eps
+        self.easy_margin = easy_margin
+        self.cos_m = tf.math.cos(m)
+        self.sin_m = tf.math.sin(m)
+        self.th = tf.math.cos(math.pi - m)
+        self.mm = tf.math.sin(math.pi - m) * m
+    def get_config(self):
+        config = super().get_config().copy()
+        config.update({
+            'n_classes': self.n_classes,
+            's': self.s,
+            'm': self.m,
+            'k': self.k,
+            'ls_eps': self.ls_eps,
+            'easy_margin': self.easy_margin,
+        })
+        return config
+    def build(self, input_shape):
+        super(ArcMarginProductSubCenter, self).build(input_shape[0])
+        self.W = self.add_weight(
+            name='W',
+            shape=(int(input_shape[0][-1]), self.n_classes * self.k),
+            initializer='glorot_uniform',
+            dtype='float32',
+            trainable=True)
+    def call(self, inputs):
+        X, y = inputs
+        y = tf.cast(y, dtype=tf.int32)
+        cosine_all = tf.matmul(
+            tf.math.l2_normalize(X, axis=1),
+            tf.math.l2_normalize(self.W, axis=0)
+        )
+        if self.k > 1:
+            cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k])
+            cosine = tf.math.reduce_max(cosine_all, axis=2)
+        else:
+            cosine = cosine_all
+        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
+        phi = cosine * self.cos_m - sine * self.sin_m
+        if self.easy_margin:
+            phi = tf.where(cosine > 0, phi, cosine)
+        else:
+            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
+        one_hot = tf.cast(
+            tf.one_hot(y, depth=self.n_classes),
+            dtype=cosine.dtype
+        )
+        if self.ls_eps > 0:
+            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes
+        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
+        output *= self.s
+        return output
+TFHUB = {
+    'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2",
+    'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2",
+    'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2",
+    'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2",
+    'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1",
+    'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1",
+    'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1",
+    'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1",
+    'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1",
+}
+def get_model(cfg):
+    aux_arcface = False  # Chris Deotte suggested this
+    if cfg.head == 'arcface2':
+        head = ArcMarginPenaltyLogists
+    elif cfg.head == 'arcface':
+        head = ArcMarginProductSubCenter
+    elif cfg.head == 'addface':
+        head = AddMarginProductSubCenter
+    else:
+        assert False, "INVALID HEAD"
+    if cfg.adaptive_margin:
+        # define adaptive margins depending on class frequencies (dynamic margins)
+        df = pd.read_csv(f'{project_dir}/train.csv')
+        fewness = df['individual_id'].value_counts().sort_index() ** (-1/4)
+        fewness -= fewness.min()
+        fewness /= fewness.max() - fewness.min()
+        adaptive_margin = cfg.margin_min + fewness * (cfg.margin_max - cfg.margin_min)
+        # align margins with targets
+        splits_path = '/kaggle/input/happywhale-splits'
+        with open (f'{splits_path}/individual_ids.json', "r") as f:
+            target_encodings = json.loads(f.read())  # individual_id: index
+        individual_ids = pd.Series(target_encodings).sort_values().index.values
+        adaptive_margin = adaptive_margin.loc[individual_ids].values.astype(np.float32)
+    if cfg.arch_name.startswith('efnv1'):
+        EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
+               'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
+               'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
+               'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}
+    if cfg.arch_name.startswith('efnv2'):
+        EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
+               'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}
+    with strategy.scope():
+        margin = head(
+            n_classes = cfg.N_CLASSES,
+            s = 30,
+            m = adaptive_margin if cfg.adaptive_margin else 0.3,
+            k = cfg.subcenters or 1,
+            easy_margin = False,
+            name=f'head/{cfg.head}',
+            dtype='float32')
+        inp = tf.keras.layers.Input(shape = [*cfg.IMAGE_SIZE, 3], name = 'inp1')
+        label = tf.keras.layers.Input(shape = (), name = 'inp2')
+        if aux_arcface:
+            label2 = tf.keras.layers.Input(shape = (), name = 'inp3')
+        if cfg.arch_name.startswith('efnv1'):
+            x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
+            if cfg.pool == 'flatten':
+                embed = tf.keras.layers.Flatten()(x)
+            elif cfg.pool == 'fc':
+                embed = tf.keras.layers.Flatten()(x)
+                embed = tf.keras.layers.Dropout(0.1)(embed)
+                embed = tf.keras.layers.Dense(1024)(embed)
+            elif cfg.pool == 'gem':
+                embed = GeMPoolingLayer(train_p=True)(x)
+            elif cfg.pool == 'concat':
+                embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
+                                                     tf.keras.layers.GlobalAveragePooling2D()(x)])
+            elif cfg.pool == 'max':
+                embed = tf.keras.layers.GlobalMaxPooling2D()(x)
+            else:
+                embed = tf.keras.layers.GlobalAveragePooling2D()(x)
+        elif cfg.arch_name.startswith('efnv2'):
+            x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
+                                   pretrained=cfg.pretrained)(inp)
+            if cfg.pool == 'flatten':
+                embed = tf.keras.layers.Flatten()(x)
+            elif cfg.pool == 'fc':
+                embed = tf.keras.layers.Flatten()(x)
+                embed = tf.keras.layers.Dropout(0.1)(embed)
+                embed = tf.keras.layers.Dense(1024)(embed)
+            elif cfg.pool == 'gem':
+                embed = GeMPoolingLayer(train_p=True)(x)
+            elif cfg.pool == 'concat':
+                embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
+                                                     tf.keras.layers.GlobalAveragePooling2D()(x)])
+            elif cfg.pool == 'max':
+                embed = tf.keras.layers.GlobalMaxPooling2D()(x)
+            else:
+                embed = tf.keras.layers.GlobalAveragePooling2D()(x)
+        elif cfg.arch_name in TFHUB:
+            # tfhub models cannot be modified => Pooling cannot be changed!
+            url = TFHUB[cfg.arch_name]
+            model = hub.KerasLayer(url, trainable=True)
+            embed = model(inp)
+            #print(f"{cfg.arch_name} from tfhub")
+            assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'
+        elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
+            #print(f"{cfg.arch_name} from tfimm")
+            #embed = tfimm.create_model(cfg.arch_name, pretrained="timm", nb_classes=0)(inp)
+            embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)
+            # create_model(nb_classes=0) includes pooling as last layer
+        if len(cfg.dropout_ps) > 0:
+            # Chris Deotte posted model code without Dropout/FC1 after pooling
+            embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
+            embed = tf.keras.layers.Dense(1024)(embed)       # tunable embedding size
+        embed = tf.keras.layers.BatchNormalization()(embed)  # missing in public notebooks
+        x = margin([embed, label])
+        output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)
+        if cfg.aux_loss and aux_arcface:
+            # Use 2nd arcface head for species (aux loss)
+            head2 = ArcMarginProductSubCenter
+            margin2 = head(
+                n_classes = cfg.n_species,
+                s = 30,
+                m = 0.3,
+                k = 1,
+                easy_margin = False,
+                name=f'auxhead/{cfg.head}',
+                dtype='float32')
+            aux_features = margin2([embed, label2])
+            aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
+        elif cfg.aux_loss:
+            aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
+            aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
+        inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
+        outputs = (output, aux_output) if cfg.aux_loss else [output]
+        model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
+        embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)
+        opt = tf.keras.optimizers.Adam(learning_rate=cfg.LR)
+        if cfg.FREEZE_BATCH_NORM:
+            freeze_BN(model)
+        return model, embed_model