Spaces:

power2
/

powerswp

Runtime error

App Files Files Community

sanket kheni commited on Dec 16, 2022

Commit

7576d48

1 Parent(s): 6b2b650

.

Browse files

Files changed (4) hide show

app.py +7 -8
retina_model/anchor.py +296 -0
retina_model/models.py +301 -0
retina_model/ops.py +27 -0

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 import os
-import cv2
-import gradio
 import numpy as np
-from huggingface_hub import Repository
 from scipy.ndimage import gaussian_filter
-from tensorflow.keras.models import load_model
-from tensorflow_addons.layers import InstanceNormalization
-from networks.layers import AdaIN, AdaptiveAttention
 from options.swap_options import SwapOptions
-from utils.utils import (estimate_norm, get_lm, inverse_estimate_norm,
-                         norm_crop, transform_landmark_points)
 # .
 # token = os.environ['model_fetch']

+import gradio
+from huggingface_hub import Repository
 import os
+from utils.utils import norm_crop, estimate_norm, inverse_estimate_norm, transform_landmark_points, get_lm
+from networks.layers import AdaIN, AdaptiveAttention
+from tensorflow_addons.layers import InstanceNormalization
 import numpy as np
+import cv2
 from scipy.ndimage import gaussian_filter
+from tensorflow.keras.models import load_model
 from options.swap_options import SwapOptions
 # .
 # token = os.environ['model_fetch']

retina_model/anchor.py ADDED Viewed

	@@ -0,0 +1,296 @@

+"""Anchor utils modified from https://github.com/biubug6/Pytorch_Retinaface"""
+import math
+import tensorflow as tf
+import numpy as np
+from itertools import product as product
+###############################################################################
+#   Tensorflow / Numpy Priors                                                 #
+###############################################################################
+def prior_box(image_sizes, min_sizes, steps, clip=False):
+    """prior box"""
+    feature_maps = [
+        [math.ceil(image_sizes[0] / step), math.ceil(image_sizes[1] / step)]
+        for step in steps]
+    anchors = []
+    for k, f in enumerate(feature_maps):
+        for i, j in product(range(f[0]), range(f[1])):
+            for min_size in min_sizes[k]:
+                s_kx = min_size / image_sizes[1]
+                s_ky = min_size / image_sizes[0]
+                cx = (j + 0.5) * steps[k] / image_sizes[1]
+                cy = (i + 0.5) * steps[k] / image_sizes[0]
+                anchors += [cx, cy, s_kx, s_ky]
+    output = np.asarray(anchors).reshape([-1, 4])
+    if clip:
+        output = np.clip(output, 0, 1)
+    return output
+def prior_box_tf(image_sizes, min_sizes, steps, clip=False):
+    """prior box"""
+    image_sizes = tf.cast(tf.convert_to_tensor(image_sizes), tf.float32)
+    feature_maps = tf.math.ceil(
+        tf.reshape(image_sizes, [1, 2]) /
+        tf.reshape(tf.cast(steps, tf.float32), [-1, 1]))
+    anchors = []
+    for k in range(len(min_sizes)):
+        grid_x, grid_y = _meshgrid_tf(tf.range(feature_maps[k][1]),
+                                      tf.range(feature_maps[k][0]))
+        cx = (grid_x + 0.5) * steps[k] / image_sizes[1]
+        cy = (grid_y + 0.5) * steps[k] / image_sizes[0]
+        cxcy = tf.stack([cx, cy], axis=-1)
+        cxcy = tf.reshape(cxcy, [-1, 2])
+        cxcy = tf.repeat(cxcy, repeats=tf.shape(min_sizes[k])[0], axis=0)
+        sx = min_sizes[k] / image_sizes[1]
+        sy = min_sizes[k] / image_sizes[0]
+        sxsy = tf.stack([sx, sy], 1)
+        sxsy = tf.repeat(sxsy[tf.newaxis],
+                         repeats=tf.shape(grid_x)[0] * tf.shape(grid_x)[1],
+                         axis=0)
+        sxsy = tf.reshape(sxsy, [-1, 2])
+        anchors.append(tf.concat([cxcy, sxsy], 1))
+    output = tf.concat(anchors, axis=0)
+    if clip:
+        output = tf.clip_by_value(output, 0, 1)
+    return output
+def _meshgrid_tf(x, y):
+    """ workaround solution of the tf.meshgrid() issue:
+        https://github.com/tensorflow/tensorflow/issues/34470"""
+    grid_shape = [tf.shape(y)[0], tf.shape(x)[0]]
+    grid_x = tf.broadcast_to(tf.reshape(x, [1, -1]), grid_shape)
+    grid_y = tf.broadcast_to(tf.reshape(y, [-1, 1]), grid_shape)
+    return grid_x, grid_y
+###############################################################################
+#   Tensorflow Encoding                                                       #
+###############################################################################
+def encode_tf(labels, priors, match_thresh, ignore_thresh,
+              variances=[0.1, 0.2]):
+    """tensorflow encoding"""
+    assert ignore_thresh <= match_thresh
+    priors = tf.cast(priors, tf.float32)
+    bbox = labels[:, :4]
+    landm = labels[:, 4:-1]
+    landm_valid = labels[:, -1]  # 1: with landm, 0: w/o landm.
+    # jaccard index
+    overlaps = _jaccard(bbox, _point_form(priors))
+    # (Bipartite Matching)
+    # [num_objects] best prior for each ground truth
+    best_prior_overlap, best_prior_idx = tf.math.top_k(overlaps, k=1)
+    best_prior_overlap = best_prior_overlap[:, 0]
+    best_prior_idx = best_prior_idx[:, 0]
+    # [num_priors] best ground truth for each prior
+    overlaps_t = tf.transpose(overlaps)
+    best_truth_overlap, best_truth_idx = tf.math.top_k(overlaps_t, k=1)
+    best_truth_overlap = best_truth_overlap[:, 0]
+    best_truth_idx = best_truth_idx[:, 0]
+    # ensure best prior
+    def _loop_body(i, bt_idx, bt_overlap):
+        bp_mask = tf.one_hot(best_prior_idx[i], tf.shape(bt_idx)[0])
+        bp_mask_int = tf.cast(bp_mask, tf.int32)
+        new_bt_idx = bt_idx * (1 - bp_mask_int) + bp_mask_int * i
+        bp_mask_float = tf.cast(bp_mask, tf.float32)
+        new_bt_overlap = bt_overlap * (1 - bp_mask_float) + bp_mask_float * 2
+        return tf.cond(best_prior_overlap[i] > match_thresh,
+                       lambda: (i + 1, new_bt_idx, new_bt_overlap),
+                       lambda: (i + 1, bt_idx, bt_overlap))
+    _, best_truth_idx, best_truth_overlap = tf.while_loop(
+        lambda i, bt_idx, bt_overlap: tf.less(i, tf.shape(best_prior_idx)[0]),
+        _loop_body, [tf.constant(0), best_truth_idx, best_truth_overlap])
+    matches_bbox = tf.gather(bbox, best_truth_idx)  # [num_priors, 4]
+    matches_landm = tf.gather(landm, best_truth_idx)  # [num_priors, 10]
+    matches_landm_v = tf.gather(landm_valid, best_truth_idx)  # [num_priors]
+    loc_t = _encode_bbox(matches_bbox, priors, variances)
+    landm_t = _encode_landm(matches_landm, priors, variances)
+    landm_valid_t = tf.cast(matches_landm_v > 0, tf.float32)
+    conf_t = tf.cast(best_truth_overlap > match_thresh, tf.float32)
+    conf_t = tf.where(
+        tf.logical_and(best_truth_overlap < match_thresh,
+                       best_truth_overlap > ignore_thresh),
+        tf.ones_like(conf_t) * -1, conf_t)    # 1: pos, 0: neg, -1: ignore
+    return tf.concat([loc_t, landm_t, landm_valid_t[..., tf.newaxis],
+                      conf_t[..., tf.newaxis]], axis=1)
+def _encode_bbox(matched, priors, variances):
+    """Encode the variances from the priorbox layers into the ground truth
+    boxes we have matched (based on jaccard overlap) with the prior boxes.
+    Args:
+        matched: (tensor) Coords of ground truth for each prior in point-form
+            Shape: [num_priors, 4].
+        priors: (tensor) Prior boxes in center-offset form
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        encoded boxes (tensor), Shape: [num_priors, 4]
+    """
+    # dist b/t match center and prior's center
+    g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
+    # encode variance
+    g_cxcy /= (variances[0] * priors[:, 2:])
+    # match wh / prior wh
+    g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
+    g_wh = tf.math.log(g_wh) / variances[1]
+    # return target for smooth_l1_loss
+    return tf.concat([g_cxcy, g_wh], 1)  # [num_priors,4]
+def _encode_landm(matched, priors, variances):
+    """Encode the variances from the priorbox layers into the ground truth
+    boxes we have matched (based on jaccard overlap) with the prior boxes.
+    Args:
+        matched: (tensor) Coords of ground truth for each prior in point-form
+            Shape: [num_priors, 10].
+        priors: (tensor) Prior boxes in center-offset form
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        encoded landm (tensor), Shape: [num_priors, 10]
+    """
+    # dist b/t match center and prior's center
+    matched = tf.reshape(matched, [tf.shape(matched)[0], 5, 2])
+    priors = tf.broadcast_to(
+        tf.expand_dims(priors, 1), [tf.shape(matched)[0], 5, 4])
+    g_cxcy = matched[:, :, :2] - priors[:, :, :2]
+    # encode variance
+    g_cxcy /= (variances[0] * priors[:, :, 2:])
+    # g_cxcy /= priors[:, :, 2:]
+    g_cxcy = tf.reshape(g_cxcy, [tf.shape(g_cxcy)[0], -1])
+    # return target for smooth_l1_loss
+    return g_cxcy
+def _point_form(boxes):
+    """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
+    representation for comparison to point form ground truth data.
+    Args:
+        boxes: (tensor) center-size default boxes from priorbox layers.
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return tf.concat((boxes[:, :2] - boxes[:, 2:] / 2,
+                      boxes[:, :2] + boxes[:, 2:] / 2), axis=1)
+def _intersect(box_a, box_b):
+    """ We resize both tensors to [A,B,2]:
+    [A,2] -> [A,1,2] -> [A,B,2]
+    [B,2] -> [1,B,2] -> [A,B,2]
+    Then we compute the area of intersect between box_a and box_b.
+    Args:
+      box_a: (tensor) bounding boxes, Shape: [A,4].
+      box_b: (tensor) bounding boxes, Shape: [B,4].
+    Return:
+      (tensor) intersection area, Shape: [A,B].
+    """
+    A = tf.shape(box_a)[0]
+    B = tf.shape(box_b)[0]
+    max_xy = tf.minimum(
+        tf.broadcast_to(tf.expand_dims(box_a[:, 2:], 1), [A, B, 2]),
+        tf.broadcast_to(tf.expand_dims(box_b[:, 2:], 0), [A, B, 2]))
+    min_xy = tf.maximum(
+        tf.broadcast_to(tf.expand_dims(box_a[:, :2], 1), [A, B, 2]),
+        tf.broadcast_to(tf.expand_dims(box_b[:, :2], 0), [A, B, 2]))
+    inter = tf.maximum((max_xy - min_xy), tf.zeros_like(max_xy - min_xy))
+    return inter[:, :, 0] * inter[:, :, 1]
+def _jaccard(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.  Here we operate on
+    ground truth boxes and default boxes.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+    Return:
+        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+    """
+    inter = _intersect(box_a, box_b)
+    area_a = tf.broadcast_to(
+        tf.expand_dims(
+            (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]), 1),
+        tf.shape(inter))  # [A,B]
+    area_b = tf.broadcast_to(
+        tf.expand_dims(
+            (box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1]), 0),
+        tf.shape(inter))  # [A,B]
+    union = area_a + area_b - inter
+    return inter / union  # [A,B]
+###############################################################################
+#   Tensorflow Decoding                                                       #
+###############################################################################
+def decode_tf(labels, priors, variances=[0.1, 0.2]):
+    """tensorflow decoding"""
+    bbox = _decode_bbox(labels[:, :4], priors, variances)
+    landm = _decode_landm(labels[:, 4:14], priors, variances)
+    landm_valid = labels[:, 14][:, tf.newaxis]
+    conf = labels[:, 15][:, tf.newaxis]
+    return tf.concat([bbox, landm, landm_valid, conf], axis=1)
+def _decode_bbox(pre, priors, variances=[0.1, 0.2]):
+    """Decode locations from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        pre (tensor): location predictions for loc layers,
+            Shape: [num_priors,4]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded bounding box predictions
+    """
+    centers = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:]
+    sides = priors[:, 2:] * tf.math.exp(pre[:, 2:] * variances[1])
+    return tf.concat([centers - sides / 2, centers + sides / 2], axis=1)
+def _decode_landm(pre, priors, variances=[0.1, 0.2]):
+    """Decode landm from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        pre (tensor): landm predictions for loc layers,
+            Shape: [num_priors,10]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded landm predictions
+    """
+    landms = tf.concat(
+        [priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
+         priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
+         priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
+         priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
+         priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]], axis=1)
+    return landms

retina_model/models.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import tensorflow as tf
+from tensorflow.keras import Model
+from tensorflow.keras.applications import MobileNetV2, ResNet50
+from tensorflow.keras.layers import Input, Conv2D, ReLU, LeakyReLU
+from retinaface.anchor import decode_tf, prior_box_tf
+def _regularizer(weights_decay):
+    """l2 regularizer"""
+    return tf.keras.regularizers.l2(weights_decay)
+def _kernel_init(scale=1.0, seed=None):
+    """He normal initializer"""
+    return tf.keras.initializers.he_normal()
+class BatchNormalization(tf.keras.layers.BatchNormalization):
+    """Make trainable=False freeze BN for real (the og version is sad).
+       ref: https://github.com/zzh8829/yolov3-tf2
+    """
+    def __init__(self, axis=-1, momentum=0.9, epsilon=1e-5, center=True,
+                 scale=True, name=None, **kwargs):
+        super(BatchNormalization, self).__init__(
+            axis=axis, momentum=momentum, epsilon=epsilon, center=center,
+            scale=scale, name=name, **kwargs)
+    def call(self, x, training=False):
+        if training is None:
+            training = tf.constant(False)
+        training = tf.logical_and(training, self.trainable)
+        return super().call(x, training)
+def Backbone(backbone_type='ResNet50', use_pretrain=True):
+    """Backbone Model"""
+    weights = None
+    if use_pretrain:
+        weights = 'imagenet'
+    def backbone(x):
+        if backbone_type == 'ResNet50':
+            extractor = ResNet50(
+                input_shape=x.shape[1:], include_top=False, weights=weights)
+            pick_layer1 = 80  # [80, 80, 512]
+            pick_layer2 = 142  # [40, 40, 1024]
+            pick_layer3 = 174  # [20, 20, 2048]
+            preprocess = tf.keras.applications.resnet.preprocess_input
+        elif backbone_type == 'MobileNetV2':
+            extractor = MobileNetV2(
+                input_shape=x.shape[1:], include_top=False, weights=weights)
+            pick_layer1 = 54  # [80, 80, 32]
+            pick_layer2 = 116  # [40, 40, 96]
+            pick_layer3 = 143  # [20, 20, 160]
+            preprocess = tf.keras.applications.mobilenet_v2.preprocess_input
+        else:
+            raise NotImplementedError(
+                'Backbone type {} is not recognized.'.format(backbone_type))
+        return Model(extractor.input,
+                     (extractor.layers[pick_layer1].output,
+                      extractor.layers[pick_layer2].output,
+                      extractor.layers[pick_layer3].output),
+                     name=backbone_type + '_extrator')(preprocess(x))
+    return backbone
+class ConvUnit(tf.keras.layers.Layer):
+    """Conv + BN + Act"""
+    def __init__(self, f, k, s, wd, act=None, **kwargs):
+        super(ConvUnit, self).__init__(**kwargs)
+        self.conv = Conv2D(filters=f, kernel_size=k, strides=s, padding='same',
+                           kernel_initializer=_kernel_init(),
+                           kernel_regularizer=_regularizer(wd),
+                           use_bias=False)
+        self.bn = BatchNormalization()
+        if act is None:
+            self.act_fn = tf.identity
+        elif act == 'relu':
+            self.act_fn = ReLU()
+        elif act == 'lrelu':
+            self.act_fn = LeakyReLU(0.1)
+        else:
+            raise NotImplementedError(
+                'Activation function type {} is not recognized.'.format(act))
+    def call(self, x):
+        return self.act_fn(self.bn(self.conv(x)))
+class FPN(tf.keras.layers.Layer):
+    """Feature Pyramid Network"""
+    def __init__(self, out_ch, wd, **kwargs):
+        super(FPN, self).__init__(**kwargs)
+        act = 'relu'
+        self.out_ch = out_ch
+        self.wd = wd
+        if (out_ch <= 64):
+            act = 'lrelu'
+        self.output1 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act=act)
+        self.output2 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act=act)
+        self.output3 = ConvUnit(f=out_ch, k=1, s=1, wd=wd, act=act)
+        self.merge1 = ConvUnit(f=out_ch, k=3, s=1, wd=wd, act=act)
+        self.merge2 = ConvUnit(f=out_ch, k=3, s=1, wd=wd, act=act)
+    def call(self, x):
+        output1 = self.output1(x[0])  # [80, 80, out_ch]
+        output2 = self.output2(x[1])  # [40, 40, out_ch]
+        output3 = self.output3(x[2])  # [20, 20, out_ch]
+        up_h, up_w = tf.shape(output2)[1], tf.shape(output2)[2]
+        up3 = tf.image.resize(output3, [up_h, up_w], method='nearest')
+        output2 = output2 + up3
+        output2 = self.merge2(output2)
+        up_h, up_w = tf.shape(output1)[1], tf.shape(output1)[2]
+        up2 = tf.image.resize(output2, [up_h, up_w], method='nearest')
+        output1 = output1 + up2
+        output1 = self.merge1(output1)
+        return output1, output2, output3
+    def get_config(self):
+        config = {
+            'out_ch': self.out_ch,
+            'wd': self.wd,
+        }
+        base_config = super(FPN, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+class SSH(tf.keras.layers.Layer):
+    """Single Stage Headless Layer"""
+    def __init__(self, out_ch, wd, **kwargs):
+        super(SSH, self).__init__(**kwargs)
+        assert out_ch % 4 == 0
+        self.out_ch = out_ch
+        self.wd = wd
+        act = 'relu'
+        if (out_ch <= 64):
+            act = 'lrelu'
+        self.conv_3x3 = ConvUnit(f=out_ch // 2, k=3, s=1, wd=wd, act=None)
+        self.conv_5x5_1 = ConvUnit(f=out_ch // 4, k=3, s=1, wd=wd, act=act)
+        self.conv_5x5_2 = ConvUnit(f=out_ch // 4, k=3, s=1, wd=wd, act=None)
+        self.conv_7x7_2 = ConvUnit(f=out_ch // 4, k=3, s=1, wd=wd, act=act)
+        self.conv_7x7_3 = ConvUnit(f=out_ch // 4, k=3, s=1, wd=wd, act=None)
+        self.relu = ReLU()
+    def call(self, x):
+        conv_3x3 = self.conv_3x3(x)
+        conv_5x5_1 = self.conv_5x5_1(x)
+        conv_5x5 = self.conv_5x5_2(conv_5x5_1)
+        conv_7x7_2 = self.conv_7x7_2(conv_5x5_1)
+        conv_7x7 = self.conv_7x7_3(conv_7x7_2)
+        output = tf.concat([conv_3x3, conv_5x5, conv_7x7], axis=3)
+        output = self.relu(output)
+        return output
+    def get_config(self):
+        config = {
+            'out_ch': self.out_ch,
+            'wd': self.wd,
+        }
+        base_config = super(SSH, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+class BboxHead(tf.keras.layers.Layer):
+    """Bbox Head Layer"""
+    def __init__(self, num_anchor, wd, **kwargs):
+        super(BboxHead, self).__init__(**kwargs)
+        self.num_anchor = num_anchor
+        self.wd = wd
+        self.conv = Conv2D(filters=num_anchor * 4, kernel_size=1, strides=1)
+    def call(self, x):
+        h, w = tf.shape(x)[1], tf.shape(x)[2]
+        x = self.conv(x)
+        return tf.reshape(x, [-1, h * w * self.num_anchor, 4])
+    def get_config(self):
+        config = {
+            'num_anchor': self.num_anchor,
+            'wd': self.wd,
+        }
+        base_config = super(BboxHead, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+class LandmarkHead(tf.keras.layers.Layer):
+    """Landmark Head Layer"""
+    def __init__(self, num_anchor, wd, name='LandmarkHead', **kwargs):
+        super(LandmarkHead, self).__init__(name=name, **kwargs)
+        self.num_anchor = num_anchor
+        self.wd = wd
+        self.conv = Conv2D(filters=num_anchor * 10, kernel_size=1, strides=1)
+    def call(self, x):
+        h, w = tf.shape(x)[1], tf.shape(x)[2]
+        x = self.conv(x)
+        return tf.reshape(x, [-1, h * w * self.num_anchor, 10])
+    def get_config(self):
+        config = {
+            'num_anchor': self.num_anchor,
+            'wd': self.wd,
+        }
+        base_config = super(LandmarkHead, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+class ClassHead(tf.keras.layers.Layer):
+    """Class Head Layer"""
+    def __init__(self, num_anchor, wd, name='ClassHead', **kwargs):
+        super(ClassHead, self).__init__(name=name, **kwargs)
+        self.num_anchor = num_anchor
+        self.wd = wd
+        self.conv = Conv2D(filters=num_anchor * 2, kernel_size=1, strides=1)
+    def call(self, x):
+        h, w = tf.shape(x)[1], tf.shape(x)[2]
+        x = self.conv(x)
+        return tf.reshape(x, [-1, h * w * self.num_anchor, 2])
+    def get_config(self):
+        config = {
+            'num_anchor': self.num_anchor,
+            'wd': self.wd,
+        }
+        base_config = super(ClassHead, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+def RetinaFaceModel(cfg, training=False, iou_th=0.4, score_th=0.02,
+                    name='RetinaFaceModel'):
+    """Retina Face Model"""
+    input_size = cfg['input_size'] if training else None
+    wd = cfg['weights_decay']
+    out_ch = cfg['out_channel']
+    num_anchor = len(cfg['min_sizes'][0])
+    backbone_type = cfg['backbone_type']
+    # define model
+    x = inputs = Input([input_size, input_size, 3], name='input_image')
+    x = Backbone(backbone_type=backbone_type)(x)
+    fpn = FPN(out_ch=out_ch, wd=wd)(x)
+    features = [SSH(out_ch=out_ch, wd=wd)(f)
+                for i, f in enumerate(fpn)]
+    bbox_regressions = tf.concat(
+        [BboxHead(num_anchor, wd=wd)(f)
+         for i, f in enumerate(features)], axis=1)
+    landm_regressions = tf.concat(
+        [LandmarkHead(num_anchor, wd=wd, name=f'LandmarkHead_{i}')(f)
+         for i, f in enumerate(features)], axis=1)
+    classifications = tf.concat(
+        [ClassHead(num_anchor, wd=wd, name=f'ClassHead_{i}')(f)
+         for i, f in enumerate(features)], axis=1)
+    classifications = tf.keras.layers.Softmax(axis=-1)(classifications)
+    if training:
+        out = (bbox_regressions, landm_regressions, classifications)
+    else:
+        # only for batch size 1
+        preds = tf.concat(  # [bboxes, landms, landms_valid, conf]
+            [bbox_regressions[0],
+             landm_regressions[0],
+             tf.ones_like(classifications[0, :, 0][..., tf.newaxis]),
+             classifications[0, :, 1][..., tf.newaxis]], 1)
+        priors = prior_box_tf((tf.shape(inputs)[1], tf.shape(inputs)[2]), cfg['min_sizes'], cfg['steps'], cfg['clip'])
+        decode_preds = decode_tf(preds, priors, cfg['variances'])
+        selected_indices = tf.image.non_max_suppression(
+            boxes=decode_preds[:, :4],
+            scores=decode_preds[:, -1],
+            max_output_size=tf.shape(decode_preds)[0],
+            iou_threshold=iou_th,
+            score_threshold=score_th)
+        out = tf.gather(decode_preds, selected_indices)
+    return Model(inputs, out, name=name), Model(inputs, [bbox_regressions, landm_regressions, classifications], name=name + '_bb_only')

retina_model/ops.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from retinaface.anchor import decode_tf, prior_box_tf
+import tensorflow as tf
+def extract_detections(bbox_regressions, landm_regressions, classifications, image_sizes, iou_th=0.4, score_th=0.02):
+    min_sizes = [[16, 32], [64, 128], [256, 512]]
+    steps = [8, 16, 32]
+    variances = [0.1, 0.2]
+    preds = tf.concat(  # [bboxes, landms, landms_valid, conf]
+        [bbox_regressions,
+         landm_regressions,
+         tf.ones_like(classifications[:, 0][..., tf.newaxis]),
+         classifications[:, 1][..., tf.newaxis]], 1)
+    priors = prior_box_tf(image_sizes, min_sizes, steps, False)
+    decode_preds = decode_tf(preds, priors, variances)
+    selected_indices = tf.image.non_max_suppression(
+        boxes=decode_preds[:, :4],
+        scores=decode_preds[:, -1],
+        max_output_size=tf.shape(decode_preds)[0],
+        iou_threshold=iou_th,
+        score_threshold=score_th)
+    out = tf.gather(decode_preds, selected_indices)
+    return out