diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..91a04233555bd3ecd0a0fb8b32b41fa573d01ec2
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+*.psd filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000000000000000000000000000000000000..453dc7c1852259d952755381431e7e2be5d55cb7
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,20 @@
+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://santit96:$HF_TOKEN@huggingface.co/spaces/rootstrap-org/waste-classifier main
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..4526ed90d42e61b98847adedbe767e9488824b2a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+__pycache__
+.DS_Store
+*.jpg
+*.png
+*.jpeg
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..74a1f7712f8d55a366c21796378d8c603eef6e74
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+---
+title: Waste Classifier
+emoji: ♻️
+colorFrom: green
+colorTo: gray
+sdk: streamlit
+sdk_version: 1.25.0
+pinned: false
+---
+
+Waste Classifier
+==============================
+
+Waste Detection and Classifier tool
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5d832da21db24290172878432f4ca669c25db4d
--- /dev/null
+++ b/app.py
@@ -0,0 +1,82 @@
+"""
+Streamlit app
+"""
+import sys
+
+import streamlit as st
+
+from constants import (CLAS_FILEPATH, CLAS_THRESHOLD, CLASSES, DET_FILEPATH,
+                       DET_NAME, DET_THRESHOLD, DEVICE, OUTPUT_IMG_FILEPATH)
+
+sys.path.append("./efficientdet")
+
+from PIL import Image
+
+from efficientdet.efficientdet import plot_results
+from trash_detector import detect_trash
+
+
+def initial_config():
+    """
+    Initial configuration of streamlit page
+    """
+    st.set_page_config(
+        page_title="Waste Classifier",
+        page_icon="♻️",
+    )
+
+
+def render():
+    """
+    Render the streamlit app
+    """
+    st.title("Waste classifier")
+    st.markdown("""Classify your waste into different classes""")
+
+    # Image loader and button
+    uploaded_file = st.file_uploader(
+        "Upload image with trash", type=["jpg", "jpeg", "png", "gif", "bmp"]
+    )
+    classify_button = st.button("Classify trash")
+
+    if classify_button:
+        if not uploaded_file:
+            st.error("Upload an image")
+        else:
+            # Create two columns
+            col1, col2 = st.columns(2)
+
+            # Column 1: Uploaded image
+            with col1:
+                st.write("Uploaded image")
+                st.image(
+                    uploaded_file, caption="Uploaded Image.", use_column_width=True
+                )
+
+            # Column 2: Classified image
+            with col2:
+                with st.spinner(text="Classifying the trash..."):
+                    img = Image.open(uploaded_file).convert("RGB")
+                    cls_prob, bboxes_final = detect_trash(
+                        img,
+                        DET_NAME,
+                        DET_FILEPATH,
+                        CLAS_FILEPATH,
+                        DEVICE,
+                        DET_THRESHOLD,
+                        CLAS_THRESHOLD,
+                    )
+                    # plot and save demo image
+                    plot_results(
+                        img, cls_prob, bboxes_final, CLASSES, OUTPUT_IMG_FILEPATH
+                    )
+                    output_img = Image.open(OUTPUT_IMG_FILEPATH)
+                    st.write("Classified image")
+                    st.image(
+                        output_img, caption="Classified Image.", use_column_width=True
+                    )
+
+
+if __name__ == "__main__":
+    initial_config()
+    render()
diff --git a/constants.py b/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce4a34c7bf90ed1ec019c113cea38c16e918e504
--- /dev/null
+++ b/constants.py
@@ -0,0 +1,8 @@
+CLAS_FILEPATH = "models/resnet50-classifier.pkl"
+DET_FILEPATH = "models/efficientdet-d2-detector.pth.tar"
+CLASSES = ["cardboard", "compost", "glass", "metal", "paper", "plastic", "trash"]
+DET_NAME = "tf_efficientdet_d2"
+CLAS_THRESHOLD = 0.5
+DET_THRESHOLD = 0.17
+DEVICE = "cpu"
+OUTPUT_IMG_FILEPATH = "classified_image.jpg"
diff --git a/efficientdet/__init__.py b/efficientdet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/efficientdet/effdet/__init__.py b/efficientdet/effdet/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2aa4bc6bbb65fb9dd873894862b089aa6a33d18
--- /dev/null
+++ b/efficientdet/effdet/__init__.py
@@ -0,0 +1,7 @@
+from .efficientdet import EfficientDet
+from .bench import DetBenchPredict, DetBenchTrain, unwrap_bench
+from .data import create_dataset, create_loader, create_parser, DetectionDatset, SkipSubset
+from .evaluator import CocoEvaluator, PascalEvaluator, OpenImagesEvaluator, create_evaluator
+from .config import get_efficientdet_config, default_detection_model_configs
+from .factory import create_model, create_model_from_config
+from .helpers import load_checkpoint, load_pretrained
diff --git a/efficientdet/effdet/anchors.py b/efficientdet/effdet/anchors.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcafc37491ccb774020b9246b2518034072724b3
--- /dev/null
+++ b/efficientdet/effdet/anchors.py
@@ -0,0 +1,421 @@
+""" RetinaNet / EfficientDet Anchor Gen
+
+Adapted for PyTorch from Tensorflow impl at
+    https://github.com/google/automl/blob/6f6694cec1a48cdb33d5d1551a2d5db8ad227798/efficientdet/anchors.py
+
+Hacked together by Ross Wightman, original copyright below
+"""
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Anchor definition.
+
+This module is borrowed from TPU RetinaNet implementation:
+https://github.com/tensorflow/tpu/blob/master/models/official/retinanet/anchors.py
+"""
+from typing import Optional, Tuple, Sequence
+
+import numpy as np
+import torch
+import torch.nn as nn
+#import torchvision.ops.boxes as tvb
+from torchvision.ops.boxes import batched_nms, remove_small_boxes
+from typing import List
+
+from effdet.object_detection import ArgMaxMatcher, FasterRcnnBoxCoder, BoxList, IouSimilarity, TargetAssigner
+from .soft_nms import batched_soft_nms
+
+
+# The minimum score to consider a logit for identifying detections.
+MIN_CLASS_SCORE = -5.0
+
+# The score for a dummy detection
+_DUMMY_DETECTION_SCORE = -1e5
+
+# The maximum number of (anchor,class) pairs to keep for non-max suppression.
+MAX_DETECTION_POINTS = 5000
+
+# The maximum number of detections per image.
+MAX_DETECTIONS_PER_IMAGE = 100
+
+
+def decode_box_outputs(rel_codes, anchors, output_xyxy: bool=False):
+    """Transforms relative regression coordinates to absolute positions.
+
+    Network predictions are normalized and relative to a given anchor; this
+    reverses the transformation and outputs absolute coordinates for the input image.
+
+    Args:
+        rel_codes: box regression targets.
+
+        anchors: anchors on all feature levels.
+
+    Returns:
+        outputs: bounding boxes.
+
+    """
+    ycenter_a = (anchors[:, 0] + anchors[:, 2]) / 2
+    xcenter_a = (anchors[:, 1] + anchors[:, 3]) / 2
+    ha = anchors[:, 2] - anchors[:, 0]
+    wa = anchors[:, 3] - anchors[:, 1]
+
+    ty, tx, th, tw = rel_codes.unbind(dim=1)
+
+    w = torch.exp(tw) * wa
+    h = torch.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    if output_xyxy:
+        out = torch.stack([xmin, ymin, xmax, ymax], dim=1)
+    else:
+        out = torch.stack([ymin, xmin, ymax, xmax], dim=1)
+    return out
+
+
+def clip_boxes_xyxy(boxes: torch.Tensor, size: torch.Tensor):
+    boxes = boxes.clamp(min=0)
+    size = torch.cat([size, size], dim=0)
+    boxes = boxes.min(size)
+    return boxes
+
+
+def generate_detections(
+        cls_outputs, box_outputs, anchor_boxes, indices, classes,
+        img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor],
+        max_det_per_image: int = MAX_DETECTIONS_PER_IMAGE, soft_nms: bool = False):
+    """Generates detections with RetinaNet model outputs and anchors.
+
+    Args:
+        cls_outputs: a torch tensor with shape [N, 1], which has the highest class
+            scores on all feature levels. The N is the number of selected
+            top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
+
+        box_outputs: a torch tensor with shape [N, 4], which stacks box regression
+            outputs on all feature levels. The N is the number of selected top-k
+            total anchors on all levels. (k being MAX_DETECTION_POINTS)
+
+        anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all
+            feature levels. The N is the number of selected top-k total anchors on all levels.
+
+        indices: a torch tensor with shape [N], which is the indices from top-k selection.
+
+        classes: a torch tensor with shape [N], which represents the class
+            prediction on all selected anchors from top-k selection.
+
+        img_scale: a float tensor representing the scale between original image
+            and input image for the detector. It is used to rescale detections for
+            evaluating with the original groundtruth annotations.
+
+        max_det_per_image: an int constant, added as argument to make torchscript happy
+
+    Returns:
+        detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6],
+            each row representing [x_min, y_min, x_max, y_max, score, class]
+    """
+    assert box_outputs.shape[-1] == 4
+    assert anchor_boxes.shape[-1] == 4
+    assert cls_outputs.shape[-1] == 1
+
+    anchor_boxes = anchor_boxes[indices, :]
+
+    # Appply bounding box regression to anchors, boxes are converted to xyxy
+    # here since PyTorch NMS expects them in that form.
+    boxes = decode_box_outputs(box_outputs.float(), anchor_boxes, output_xyxy=True)
+    if img_scale is not None and img_size is not None:
+        boxes = clip_boxes_xyxy(boxes, img_size / img_scale)  # clip before NMS better?
+
+    scores = cls_outputs.sigmoid().squeeze(1).float()
+    if soft_nms:
+        top_detection_idx, soft_scores = batched_soft_nms(
+            boxes, scores, classes, method_gaussian=True, iou_threshold=0.3, score_threshold=.001)
+        scores[top_detection_idx] = soft_scores
+    else:
+        top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=0.5)
+
+    # keep only topk scoring predictions
+    top_detection_idx = top_detection_idx[:max_det_per_image]
+    boxes = boxes[top_detection_idx]
+    scores = scores[top_detection_idx, None]
+    classes = classes[top_detection_idx, None] + 1  # back to class idx with background class = 0
+
+    if img_scale is not None:
+        boxes = boxes * img_scale
+
+    # FIXME add option to convert boxes back to yxyx? Otherwise must be handled downstream if
+    # that is the preferred output format.
+
+    # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary
+    num_det = len(top_detection_idx)
+    detections = torch.cat([boxes, scores, classes.float()], dim=1)
+    if num_det < max_det_per_image:
+        detections = torch.cat([
+            detections,
+            torch.zeros((max_det_per_image - num_det, 6), device=detections.device, dtype=detections.dtype)
+        ], dim=0)
+    return detections
+
+
+def get_feat_sizes(image_size: Tuple[int, int], max_level: int):
+    """Get feat widths and heights for all levels.
+    Args:
+      image_size: a tuple (H, W)
+      max_level: maximum feature level.
+    Returns:
+      feat_sizes: a list of tuples (height, width) for each level.
+    """
+    feat_size = image_size
+    feat_sizes = [feat_size]
+    for _ in range(1, max_level + 1):
+        feat_size = ((feat_size[0] - 1) // 2 + 1, (feat_size[1] - 1) // 2 + 1)
+        feat_sizes.append(feat_size)
+    return feat_sizes
+
+
+class Anchors(nn.Module):
+    """RetinaNet Anchors class."""
+
+    def __init__(self, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size: Tuple[int, int]):
+        """Constructs multiscale RetinaNet anchors.
+
+        Args:
+            min_level: integer number of minimum level of the output feature pyramid.
+
+            max_level: integer number of maximum level of the output feature pyramid.
+
+            num_scales: integer number representing intermediate scales added
+                on each level. For instances, num_scales=2 adds two additional
+                anchor scales [2^0, 2^0.5] on each level.
+
+            aspect_ratios: list of tuples representing the aspect ratio anchors added
+                on each level. For instances, aspect_ratios =
+                [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
+
+            anchor_scale: float number representing the scale of size of the base
+                anchor to the feature stride 2^level.
+
+            image_size: Sequence specifying input image size of model (H, W).
+                The image_size should be divided by the largest feature stride 2^max_level.
+        """
+        super(Anchors, self).__init__()
+        self.min_level = min_level
+        self.max_level = max_level
+        self.num_scales = num_scales
+        self.aspect_ratios = aspect_ratios
+        if isinstance(anchor_scale, Sequence):
+            assert len(anchor_scale) == max_level - min_level + 1
+            self.anchor_scales = anchor_scale
+        else:
+            self.anchor_scales = [anchor_scale] * (max_level - min_level + 1)
+
+        assert isinstance(image_size, Sequence) and len(image_size) == 2
+        # FIXME this restriction can likely be relaxed with some additional changes
+        assert image_size[0] % 2 ** max_level == 0, 'Image size must be divisible by 2 ** max_level (128)'
+        assert image_size[1] % 2 ** max_level == 0, 'Image size must be divisible by 2 ** max_level (128)'
+        self.image_size = tuple(image_size)
+        self.feat_sizes = get_feat_sizes(image_size, max_level)
+        self.config = self._generate_configs()
+        self.register_buffer('boxes', self._generate_boxes())
+
+    @classmethod
+    def from_config(cls, config):
+        return cls(
+            config.min_level, config.max_level,
+            config.num_scales, config.aspect_ratios,
+            config.anchor_scale, config.image_size)
+
+    def _generate_configs(self):
+        """Generate configurations of anchor boxes."""
+        anchor_configs = {}
+        feat_sizes = self.feat_sizes
+        for level in range(self.min_level, self.max_level + 1):
+            anchor_configs[level] = []
+            for scale_octave in range(self.num_scales):
+                for aspect in self.aspect_ratios:
+                    anchor_configs[level].append(
+                        ((feat_sizes[0][0] // feat_sizes[level][0],
+                          feat_sizes[0][1] // feat_sizes[level][1]),
+                         scale_octave / float(self.num_scales), aspect,
+                         self.anchor_scales[level - self.min_level]))
+        return anchor_configs
+
+    def _generate_boxes(self):
+        """Generates multiscale anchor boxes."""
+        boxes_all = []
+        for _, configs in self.config.items():
+            boxes_level = []
+            for config in configs:
+                stride, octave_scale, aspect, anchor_scale = config
+                base_anchor_size_x = anchor_scale * stride[1] * 2 ** octave_scale
+                base_anchor_size_y = anchor_scale * stride[0] * 2 ** octave_scale
+                if isinstance(aspect, Sequence):
+                    aspect_x = aspect[0]
+                    aspect_y = aspect[1]
+                else:
+                    aspect_x = np.sqrt(aspect)
+                    aspect_y = 1.0 / aspect_x
+                anchor_size_x_2 = base_anchor_size_x * aspect_x / 2.0
+                anchor_size_y_2 = base_anchor_size_y * aspect_y / 2.0
+
+                x = np.arange(stride[1] / 2, self.image_size[1], stride[1])
+                y = np.arange(stride[0] / 2, self.image_size[0], stride[0])
+                xv, yv = np.meshgrid(x, y)
+                xv = xv.reshape(-1)
+                yv = yv.reshape(-1)
+
+                boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
+                                   yv + anchor_size_y_2, xv + anchor_size_x_2))
+                boxes = np.swapaxes(boxes, 0, 1)
+                boxes_level.append(np.expand_dims(boxes, axis=1))
+
+            # concat anchors on the same level to the reshape NxAx4
+            boxes_level = np.concatenate(boxes_level, axis=1)
+            boxes_all.append(boxes_level.reshape([-1, 4]))
+
+        anchor_boxes = np.vstack(boxes_all)
+        anchor_boxes = torch.from_numpy(anchor_boxes).float()
+        return anchor_boxes
+
+    def get_anchors_per_location(self):
+        return self.num_scales * len(self.aspect_ratios)
+
+
+class AnchorLabeler(object):
+    """Labeler for multiscale anchor boxes.
+    """
+
+    def __init__(self, anchors, num_classes: int, match_threshold: float = 0.5):
+        """Constructs anchor labeler to assign labels to anchors.
+
+        Args:
+            anchors: an instance of class Anchors.
+
+            num_classes: integer number representing number of classes in the dataset.
+
+            match_threshold: float number between 0 and 1 representing the threshold
+                to assign positive labels for anchors.
+        """
+        similarity_calc = IouSimilarity()
+        matcher = ArgMaxMatcher(
+            match_threshold,
+            unmatched_threshold=match_threshold,
+            negatives_lower_than_unmatched=True,
+            force_match_for_each_row=True)
+        box_coder = FasterRcnnBoxCoder()
+
+        self.target_assigner = TargetAssigner(similarity_calc, matcher, box_coder)
+        self.anchors = anchors
+        self.match_threshold = match_threshold
+        self.num_classes = num_classes
+        self.indices_cache = {}
+
+    def label_anchors(self, gt_boxes, gt_classes, filter_valid=True):
+        """Labels anchors with ground truth inputs.
+
+        Args:
+            gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
+                For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+
+            gt_classes: A integer tensor with shape [N, 1] representing groundtruth classes.
+
+            filter_valid: Filter out any boxes w/ gt class <= -1 before assigning
+
+        Returns:
+            cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level].
+                The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l
+                represent the dimension of class logits at l-th level.
+
+            box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level].
+                The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and
+                width_l represent the dimension of bounding box regression output at l-th level.
+
+            num_positives: scalar tensor storing number of positives in an image.
+        """
+        cls_targets_out = []
+        box_targets_out = []
+
+        if filter_valid:
+            valid_idx = gt_classes > -1  # filter gt targets w/ label <= -1
+            gt_boxes = gt_boxes[valid_idx]
+            gt_classes = gt_classes[valid_idx]
+
+        cls_targets, box_targets, matches = self.target_assigner.assign(
+            BoxList(self.anchors.boxes), BoxList(gt_boxes), gt_classes)
+
+        # class labels start from 1 and the background class = -1
+        cls_targets = (cls_targets - 1).long()
+
+        # Unpack labels.
+        """Unpacks an array of cls/box into multiple scales."""
+        count = 0
+        for level in range(self.anchors.min_level, self.anchors.max_level + 1):
+            feat_size = self.anchors.feat_sizes[level]
+            steps = feat_size[0] * feat_size[1] * self.anchors.get_anchors_per_location()
+            cls_targets_out.append(cls_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+            box_targets_out.append(box_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+            count += steps
+
+        num_positives = (matches.match_results > -1).float().sum()
+
+        return cls_targets_out, box_targets_out, num_positives
+
+    def batch_label_anchors(self, gt_boxes, gt_classes, filter_valid=True):
+        batch_size = len(gt_boxes)
+        assert batch_size == len(gt_classes)
+        num_levels = self.anchors.max_level - self.anchors.min_level + 1
+        cls_targets_out = [[] for _ in range(num_levels)]
+        box_targets_out = [[] for _ in range(num_levels)]
+        num_positives_out = []
+
+        anchor_box_list = BoxList(self.anchors.boxes)
+        for i in range(batch_size):
+            last_sample = i == batch_size - 1
+
+            if filter_valid:
+                valid_idx = gt_classes[i] > -1  # filter gt targets w/ label <= -1
+                gt_box_list = BoxList(gt_boxes[i][valid_idx])
+                gt_class_i = gt_classes[i][valid_idx]
+            else:
+                gt_box_list = BoxList(gt_boxes[i])
+                gt_class_i = gt_classes[i]
+            cls_targets, box_targets, matches = self.target_assigner.assign(anchor_box_list, gt_box_list, gt_class_i)
+
+            # class labels start from 1 and the background class = -1
+            cls_targets = (cls_targets - 1).long()
+
+            # Unpack labels.
+            """Unpacks an array of cls/box into multiple scales."""
+            count = 0
+            for level in range(self.anchors.min_level, self.anchors.max_level + 1):
+                level_idx = level - self.anchors.min_level
+                feat_size = self.anchors.feat_sizes[level]
+                steps = feat_size[0] * feat_size[1] * self.anchors.get_anchors_per_location()
+                cls_targets_out[level_idx].append(
+                    cls_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+                box_targets_out[level_idx].append(
+                    box_targets[count:count + steps].view([feat_size[0], feat_size[1], -1]))
+                count += steps
+                if last_sample:
+                    cls_targets_out[level_idx] = torch.stack(cls_targets_out[level_idx])
+                    box_targets_out[level_idx] = torch.stack(box_targets_out[level_idx])
+
+            num_positives_out.append((matches.match_results > -1).float().sum())
+            if last_sample:
+                num_positives_out = torch.stack(num_positives_out)
+
+        return cls_targets_out, box_targets_out, num_positives_out
+
diff --git a/efficientdet/effdet/bench.py b/efficientdet/effdet/bench.py
new file mode 100644
index 0000000000000000000000000000000000000000..16c1e98a7126f1886a377fa3bac7e9f6be617919
--- /dev/null
+++ b/efficientdet/effdet/bench.py
@@ -0,0 +1,143 @@
+""" PyTorch EfficientDet support benches
+
+Hacked together by Ross Wightman
+"""
+from typing import Optional, Dict, List
+import torch
+import torch.nn as nn
+from timm.utils import ModelEma
+from .anchors import Anchors, AnchorLabeler, generate_detections, MAX_DETECTION_POINTS
+from .loss import DetectionLoss
+
+
+def _post_process(
+        cls_outputs: List[torch.Tensor],
+        box_outputs: List[torch.Tensor],
+        num_levels: int,
+        num_classes: int,
+        max_detection_points: int = MAX_DETECTION_POINTS,
+):
+    """Selects top-k predictions.
+
+    Post-proc code adapted from Tensorflow version at: https://github.com/google/automl/tree/master/efficientdet
+    and optimized for PyTorch.
+
+    Args:
+        cls_outputs: an OrderDict with keys representing levels and values
+            representing logits in [batch_size, height, width, num_anchors].
+
+        box_outputs: an OrderDict with keys representing levels and values
+            representing box regression targets in [batch_size, height, width, num_anchors * 4].
+
+        num_levels (int): number of feature levels
+
+        num_classes (int): number of output classes
+    """
+    batch_size = cls_outputs[0].shape[0]
+    cls_outputs_all = torch.cat([
+        cls_outputs[level].permute(0, 2, 3, 1).reshape([batch_size, -1, num_classes])
+        for level in range(num_levels)], 1)
+
+    box_outputs_all = torch.cat([
+        box_outputs[level].permute(0, 2, 3, 1).reshape([batch_size, -1, 4])
+        for level in range(num_levels)], 1)
+
+    _, cls_topk_indices_all = torch.topk(cls_outputs_all.reshape(batch_size, -1), dim=1, k=max_detection_points)
+    indices_all = cls_topk_indices_all // num_classes
+    classes_all = cls_topk_indices_all % num_classes
+
+    box_outputs_all_after_topk = torch.gather(
+        box_outputs_all, 1, indices_all.unsqueeze(2).expand(-1, -1, 4))
+
+    cls_outputs_all_after_topk = torch.gather(
+        cls_outputs_all, 1, indices_all.unsqueeze(2).expand(-1, -1, num_classes))
+    cls_outputs_all_after_topk = torch.gather(
+        cls_outputs_all_after_topk, 2, classes_all.unsqueeze(2))
+
+    return cls_outputs_all_after_topk, box_outputs_all_after_topk, indices_all, classes_all
+
+
+@torch.jit.script
+def _batch_detection(
+        batch_size: int, class_out, box_out, anchor_boxes, indices, classes,
+        img_scale: Optional[torch.Tensor] = None, img_size: Optional[torch.Tensor] = None):
+    batch_detections = []
+    # FIXME we may be able to do this as a batch with some tensor reshaping/indexing, PR welcome
+    for i in range(batch_size):
+        img_scale_i = None if img_scale is None else img_scale[i]
+        img_size_i = None if img_size is None else img_size[i]
+        detections = generate_detections(
+            class_out[i], box_out[i], anchor_boxes, indices[i], classes[i], img_scale_i, img_size_i)
+        batch_detections.append(detections)
+    return torch.stack(batch_detections, dim=0)
+
+
+class DetBenchPredict(nn.Module):
+    def __init__(self, model):
+        super(DetBenchPredict, self).__init__()
+        self.model = model
+        self.config = model.config  # FIXME remove this when we can use @property (torchscript limitation)
+        self.num_levels = model.config.num_levels
+        self.num_classes = model.config.num_classes
+        self.anchors = Anchors.from_config(model.config)
+
+    def forward(self, x, img_info: Optional[Dict[str, torch.Tensor]] = None):
+        class_out, box_out = self.model(x)
+        class_out, box_out, indices, classes = _post_process(
+            class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes)
+        if img_info is None:
+            img_scale, img_size = None, None
+        else:
+            img_scale, img_size = img_info['img_scale'], img_info['img_size']
+        return _batch_detection(
+            x.shape[0], class_out, box_out, self.anchors.boxes, indices, classes, img_scale, img_size)
+
+
+class DetBenchTrain(nn.Module):
+    def __init__(self, model, create_labeler=True):
+        super(DetBenchTrain, self).__init__()
+        self.model = model
+        self.config = model.config  # FIXME remove this when we can use @property (torchscript limitation)
+        self.num_levels = model.config.num_levels
+        self.num_classes = model.config.num_classes
+        self.anchors = Anchors.from_config(model.config)
+        self.anchor_labeler = None
+        if create_labeler:
+            self.anchor_labeler = AnchorLabeler(self.anchors, self.num_classes, match_threshold=0.5)
+        self.loss_fn = DetectionLoss(model.config)
+
+    def forward(self, x, target: Dict[str, torch.Tensor]):
+        class_out, box_out = self.model(x)
+        if self.anchor_labeler is None:
+            # target should contain pre-computed anchor labels if labeler not present in bench
+            assert 'label_num_positives' in target
+            cls_targets = [target[f'label_cls_{l}'] for l in range(self.num_levels)]
+            box_targets = [target[f'label_bbox_{l}'] for l in range(self.num_levels)]
+            num_positives = target['label_num_positives']
+        else:
+            cls_targets, box_targets, num_positives = self.anchor_labeler.batch_label_anchors(
+                target['bbox'], target['cls'])
+
+        loss, class_loss, box_loss = self.loss_fn(class_out, box_out, cls_targets, box_targets, num_positives)
+        output = {'loss': loss, 'class_loss': class_loss, 'box_loss': box_loss}
+        if not self.training:
+            # if eval mode, output detections for evaluation
+            class_out_pp, box_out_pp, indices, classes = _post_process(
+                class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes)
+            output['detections'] = _batch_detection(
+                x.shape[0], class_out_pp, box_out_pp, self.anchors.boxes, indices, classes,
+                target['img_scale'], target['img_size'])
+        return output
+
+
+def unwrap_bench(model):
+    # Unwrap a model in support bench so that various other fns can access the weights and attribs of the
+    # underlying model directly
+    if isinstance(model, ModelEma):  # unwrap ModelEma
+        return unwrap_bench(model.ema)
+    elif hasattr(model, 'module'):  # unwrap DDP
+        return unwrap_bench(model.module)
+    elif hasattr(model, 'model'):  # unwrap Bench -> model
+        return unwrap_bench(model.model)
+    else:
+        return model
diff --git a/efficientdet/effdet/config/__init__.py b/efficientdet/effdet/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0dd74ec181acca5c0aa089fc573e3c43690ad64b
--- /dev/null
+++ b/efficientdet/effdet/config/__init__.py
@@ -0,0 +1,4 @@
+from .config_utils import set_config_readonly, set_config_writeable
+from .fpn_config import get_fpn_config
+from .model_config import get_efficientdet_config, default_detection_model_configs
+from .train_config import default_detection_train_config
diff --git a/efficientdet/effdet/config/config_utils.py b/efficientdet/effdet/config/config_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f367cccab787691881e05f6e42354443db6d1874
--- /dev/null
+++ b/efficientdet/effdet/config/config_utils.py
@@ -0,0 +1,9 @@
+from omegaconf import OmegaConf
+
+
+def set_config_readonly(conf):
+    OmegaConf.set_readonly(conf, True)
+
+
+def set_config_writeable(conf):
+    OmegaConf.set_readonly(conf, False)
diff --git a/efficientdet/effdet/config/fpn_config.py b/efficientdet/effdet/config/fpn_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..e12ed18923632a713fb478fe97ebc75f1e370124
--- /dev/null
+++ b/efficientdet/effdet/config/fpn_config.py
@@ -0,0 +1,184 @@
+import itertools
+
+from omegaconf import OmegaConf
+
+
+def bifpn_config(min_level, max_level, weight_method=None):
+    """BiFPN config.
+    Adapted from https://github.com/google/automl/blob/56815c9986ffd4b508fe1d68508e268d129715c1/efficientdet/keras/fpn_configs.py
+    """
+    p = OmegaConf.create()
+    weight_method = weight_method or 'fastattn'
+
+    num_levels = max_level - min_level + 1
+    node_ids = {min_level + i: [i] for i in range(num_levels)}
+
+    level_last_id = lambda level: node_ids[level][-1]
+    level_all_ids = lambda level: node_ids[level]
+    id_cnt = itertools.count(num_levels)
+
+    p.nodes = []
+    for i in range(max_level - 1, min_level - 1, -1):
+        # top-down path.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [level_last_id(i), level_last_id(i + 1)],
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+
+    for i in range(min_level + 1, max_level + 1):
+        # bottom-up path.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)],
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+    return p
+
+
+def panfpn_config(min_level, max_level, weight_method=None):
+    """PAN FPN config.
+
+    This defines FPN layout from Path Aggregation Networks as an alternate to
+    BiFPN, it does not implement the full PAN spec.
+
+    Paper: https://arxiv.org/abs/1803.01534
+    """
+    p = OmegaConf.create()
+    weight_method = weight_method or 'fastattn'
+
+    num_levels = max_level - min_level + 1
+    node_ids = {min_level + i: [i] for i in range(num_levels)}
+    level_last_id = lambda level: node_ids[level][-1]
+    id_cnt = itertools.count(num_levels)
+
+    p.nodes = []
+    for i in range(max_level, min_level - 1, -1):
+        # top-down path.
+        offsets = [level_last_id(i), level_last_id(i + 1)] if i != max_level else [level_last_id(i)]
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': offsets,
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+
+    for i in range(min_level, max_level + 1):
+        # bottom-up path.
+        offsets = [level_last_id(i), level_last_id(i - 1)] if i != min_level else [level_last_id(i)]
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': offsets,
+            'weight_method': weight_method,
+        })
+        node_ids[i].append(next(id_cnt))
+
+    return p
+
+
+def qufpn_config(min_level, max_level, weight_method=None):
+    """A dynamic quad fpn config that can adapt to different min/max levels.
+
+    It extends the idea of BiFPN, and has four paths:
+        (up_down -> bottom_up) + (bottom_up -> up_down).
+
+    Paper: https://ieeexplore.ieee.org/document/9225379
+    Ref code: From contribution to TF EfficientDet
+    https://github.com/google/automl/blob/eb74c6739382e9444817d2ad97c4582dbe9a9020/efficientdet/keras/fpn_configs.py
+    """
+    p = OmegaConf.create()
+    weight_method = weight_method or 'fastattn'
+    quad_method = 'fastattn'
+    num_levels = max_level - min_level + 1
+    node_ids = {min_level + i: [i] for i in range(num_levels)}
+    level_last_id = lambda level: node_ids[level][-1]
+    level_all_ids = lambda level: node_ids[level]
+    level_first_id = lambda level: node_ids[level][0]
+    id_cnt = itertools.count(num_levels)
+
+    p.nodes = []
+    for i in range(max_level - 1, min_level - 1, -1):
+        # top-down path 1.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [level_last_id(i), level_last_id(i + 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    node_ids[max_level].append(node_ids[max_level][-1])
+
+    for i in range(min_level + 1, max_level):
+        # bottom-up path 2.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+
+    i = max_level
+    p.nodes.append({
+        'reduction': 1 << i,
+        'inputs_offsets': [level_first_id(i)] + [level_last_id(i - 1)],
+        'weight_method': weight_method
+    })
+    node_ids[i].append(next(id_cnt))
+    node_ids[min_level].append(node_ids[min_level][-1])
+
+    for i in range(min_level + 1, max_level + 1, 1):
+        # bottom-up path 3.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [
+                level_first_id(i), level_last_id(i - 1) if i != min_level + 1 else level_first_id(i - 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    node_ids[min_level].append(node_ids[min_level][-1])
+
+    for i in range(max_level - 1, min_level, -1):
+        # top-down path 4.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [node_ids[i][0]] + [node_ids[i][-1]] + [level_last_id(i + 1)],
+            'weight_method': weight_method
+        })
+        node_ids[i].append(next(id_cnt))
+    i = min_level
+    p.nodes.append({
+        'reduction': 1 << i,
+        'inputs_offsets': [node_ids[i][0]] + [level_last_id(i + 1)],
+        'weight_method': weight_method
+    })
+    node_ids[i].append(next(id_cnt))
+    node_ids[max_level].append(node_ids[max_level][-1])
+
+    # NOTE: the order of the quad path is reversed from the original, my code expects the output of
+    # each FPN repeat to be same as input from backbone, in order of increasing reductions
+    for i in range(min_level, max_level + 1):
+        # quad-add path.
+        p.nodes.append({
+            'reduction': 1 << i,
+            'inputs_offsets': [node_ids[i][2], node_ids[i][4]],
+            'weight_method': quad_method
+        })
+        node_ids[i].append(next(id_cnt))
+
+    return p
+
+
+def get_fpn_config(fpn_name, min_level=3, max_level=7):
+    if not fpn_name:
+        fpn_name = 'bifpn_fa'
+    name_to_config = {
+        'bifpn_sum': bifpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
+        'bifpn_attn': bifpn_config(min_level=min_level, max_level=max_level, weight_method='attn'),
+        'bifpn_fa': bifpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
+        'pan_sum': panfpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
+        'pan_fa': panfpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
+        'qufpn_sum': qufpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
+        'qufpn_fa': qufpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
+    }
+    return name_to_config[fpn_name]
diff --git a/efficientdet/effdet/config/model_config.py b/efficientdet/effdet/config/model_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..c92ac142818ce69d1533842d8f40c85e2f6421aa
--- /dev/null
+++ b/efficientdet/effdet/config/model_config.py
@@ -0,0 +1,538 @@
+"""EfficientDet Configurations
+
+Adapted from official impl at https://github.com/google/automl/tree/master/efficientdet
+
+TODO use a different config system (OmegaConfig -> Hydra?), separate model from train specific hparams
+"""
+
+from omegaconf import OmegaConf
+from copy import deepcopy
+
+
+def default_detection_model_configs():
+    """Returns a default detection configs."""
+    h = OmegaConf.create()
+
+    # model name.
+    h.name = 'tf_efficientdet_d1'
+
+    h.backbone_name = 'tf_efficientnet_b1'
+    h.backbone_args = None  # FIXME sort out kwargs vs config for backbone creation
+
+    # model specific, input preprocessing parameters
+    h.image_size = (640, 640)
+
+    # dataset specific head parameters
+    h.num_classes = 90
+
+    # feature + anchor config
+    h.min_level = 3
+    h.max_level = 7
+    h.num_levels = h.max_level - h.min_level + 1
+    h.num_scales = 3
+    h.aspect_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
+    # ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset.
+    #h.aspect_ratios = [1.0, 2.0, 0.5]
+    h.anchor_scale = 4.0
+
+    # FPN and head config
+    h.pad_type = 'same'  # original TF models require an equivalent of Tensorflow 'SAME' padding
+    h.act_type = 'swish'
+    h.norm_layer = None  # defaults to batch norm when None
+    h.norm_kwargs = dict(eps=.001, momentum=.01)
+    h.box_class_repeats = 3
+    h.fpn_cell_repeats = 3
+    h.fpn_channels = 88
+    h.separable_conv = True
+    h.apply_bn_for_resampling = True
+    h.conv_after_downsample = False
+    h.conv_bn_relu_pattern = False
+    h.use_native_resize_op = False
+    h.pooling_type = None
+    h.redundant_bias = True  # original TF models have back to back bias + BN layers, not necessary!
+    h.head_bn_level_first = False  # change order of BN in head repeat list of lists, True for torchscript compat
+
+    h.fpn_name = None
+    h.fpn_config = None
+    h.fpn_drop_path_rate = 0.  # No stochastic depth in default. NOTE not currently used, unstable training
+
+    # classification loss (used by train bench)
+    h.alpha = 0.25
+    h.gamma = 1.5
+    h.label_smoothing = 0.  # only supported if new_focal == True
+    h.new_focal = False  # use new focal loss (supports label smoothing but uses more mem, less optimal w/ jit script)
+    h.jit_loss = False  # torchscript jit for loss fn speed improvement, can impact stability and/or increase mem usage
+
+    # localization loss (used by train bench)
+    h.delta = 0.1
+    h.box_loss_weight = 50.0
+
+    return h
+
+
+efficientdet_model_param_dict = dict(
+    # Models with PyTorch friendly padding and my PyTorch pretrained backbones, training TBD
+    efficientdet_d0=dict(
+        name='efficientdet_d0',
+        backbone_name='efficientnet_b0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d0-f3276ba8.pth',
+    ),
+    efficientdet_d1=dict(
+        name='efficientdet_d1',
+        backbone_name='efficientnet_b1',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/efficientdet_d1-bb7e98fe.pth',
+    ),
+    efficientdet_d2=dict(
+        name='efficientdet_d2',
+        backbone_name='efficientnet_b2',
+        image_size=(768, 768),
+        fpn_channels=112,
+        fpn_cell_repeats=5,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_d3=dict(
+        name='efficientdet_d3',
+        backbone_name='efficientnet_b3',
+        image_size=(896, 896),
+        fpn_channels=160,
+        fpn_cell_repeats=6,
+        box_class_repeats=4,
+        pad_type='',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_d4=dict(
+        name='efficientdet_d4',
+        backbone_name='efficientnet_b4',
+        image_size=(1024, 1024),
+        fpn_channels=224,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+    ),
+    efficientdet_d5=dict(
+        name='efficientdet_d5',
+        backbone_name='efficientnet_b5',
+        image_size=(1280, 1280),
+        fpn_channels=288,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+
+    # My own experimental configs with alternate models, training TBD
+    # Note: any 'timm' model in the EfficientDet family can be used as a backbone here.
+    resdet50=dict(
+        name='resdet50',
+        backbone_name='resnet50',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu',
+        redundant_bias=False,
+        separable_conv=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/resdet50_416-08676892.pth',
+    ),
+    cspresdet50=dict(
+        name='cspresdet50',
+        backbone_name='cspresnet50',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    cspresdext50=dict(
+        name='cspresdext50',
+        backbone_name='cspresnext50',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    cspresdext50pan=dict(
+        name='cspresdext50pan',
+        backbone_name='cspresnext50',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        fpn_name='pan_fa',  # PAN FPN experiment
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    cspdarkdet53=dict(
+        name='cspdarkdet53',
+        backbone_name='cspdarknet53',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='leaky_relu',
+        redundant_bias=False,
+        separable_conv=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    mixdet_m=dict(
+        name='mixdet_m',
+        backbone_name='mixnet_m',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',  # no pretrained weights yet
+    ),
+    mixdet_l=dict(
+        name='mixdet_l',
+        backbone_name='mixnet_l',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    mobiledetv2_110d=dict(
+        name='mobiledetv2_110d',
+        backbone_name='mobilenetv2_110d',
+        image_size=(384, 384),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=48,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu6',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.05),
+        url='',  # no pretrained weights yet
+    ),
+    mobiledetv2_120d=dict(
+        name='mobiledetv2_120d',
+        backbone_name='mobilenetv2_120d',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=56,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu6',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',  # no pretrained weights yet
+    ),
+    mobiledetv3_large=dict(
+        name='mobiledetv3_large',
+        backbone_name='mobilenetv3_large_100',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='hard_swish',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_q0=dict(
+        name='efficientdet_q0',
+        backbone_name='efficientnet_b0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        fpn_name='qufpn_fa',  # quad-fpn + fast attn experiment
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',
+    ),
+    efficientdet_w0=dict(
+        name='efficientdet_w0',  # 'wide'
+        backbone_name='efficientnet_b0',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=80,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(
+            drop_path_rate=0.1,
+            feature_location='depthwise'),  # features from after DW/SE in IR block
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_es=dict(
+        name='efficientdet_es',   #EdgeTPU-Small
+        backbone_name='efficientnet_es',
+        image_size=(512, 512),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=72,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        separable_conv=False,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',
+    ),
+    efficientdet_em=dict(
+        name='efficientdet_em',  # Edge-TPU Medium
+        backbone_name='efficientnet_em',
+        image_size=(640, 640),
+        aspect_ratios=[1.0, 2.0, 0.5],
+        fpn_channels=96,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        pad_type='',
+        act_type='relu',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        separable_conv=False,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    efficientdet_lite0=dict(
+        name='efficientdet_lite0',
+        backbone_name='efficientnet_lite0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        act_type='relu',
+        redundant_bias=False,
+        head_bn_level_first=True,
+        backbone_args=dict(drop_path_rate=0.1),
+        url='',
+    ),
+
+    # Models ported from Tensorflow with pretrained backbones ported from Tensorflow
+    tf_efficientdet_d0=dict(
+        name='tf_efficientdet_d0',
+        backbone_name='tf_efficientnet_b0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d0_34-f153e0cf.pth',
+    ),
+    tf_efficientdet_d1=dict(
+        name='tf_efficientdet_d1',
+        backbone_name='tf_efficientnet_b1',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d1_40-a30f94af.pth'
+    ),
+    tf_efficientdet_d2=dict(
+        name='tf_efficientdet_d2',
+        backbone_name='tf_efficientnet_b2',
+        image_size=(768, 768),
+        fpn_channels=112,
+        fpn_cell_repeats=5,
+        box_class_repeats=3,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d2_43-8107aa99.pth',
+    ),
+    tf_efficientdet_d3=dict(
+        name='tf_efficientdet_d3',
+        backbone_name='tf_efficientnet_b3',
+        image_size=(896, 896),
+        fpn_channels=160,
+        fpn_cell_repeats=6,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d3_47-0b525f35.pth',
+    ),
+    tf_efficientdet_d4=dict(
+        name='tf_efficientdet_d4',
+        backbone_name='tf_efficientnet_b4',
+        image_size=(1024, 1024),
+        fpn_channels=224,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d4_49-f56376d9.pth',
+    ),
+    tf_efficientdet_d5=dict(
+        name='tf_efficientdet_d5',
+        backbone_name='tf_efficientnet_b5',
+        image_size=(1280, 1280),
+        fpn_channels=288,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d5_51-c79f9be6.pth',
+    ),
+    tf_efficientdet_d6=dict(
+        name='tf_efficientdet_d6',
+        backbone_name='tf_efficientnet_b6',
+        image_size=(1280, 1280),
+        fpn_channels=384,
+        fpn_cell_repeats=8,
+        box_class_repeats=5,
+        fpn_name='bifpn_sum',  # Use unweighted sum for training stability.
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d6_52-4eda3773.pth'
+    ),
+    tf_efficientdet_d7=dict(
+        name='tf_efficientdet_d7',
+        backbone_name='tf_efficientnet_b6',
+        image_size=(1536, 1536),
+        fpn_channels=384,
+        fpn_cell_repeats=8,
+        box_class_repeats=5,
+        anchor_scale=5.0,
+        fpn_name='bifpn_sum',  # Use unweighted sum for training stability.
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7_53-6d1d7a95.pth'
+    ),
+    tf_efficientdet_d7x=dict(
+        name='tf_efficientdet_d7x',
+        backbone_name='tf_efficientnet_b7',
+        image_size=(1536, 1536),
+        fpn_channels=384,
+        fpn_cell_repeats=8,
+        box_class_repeats=5,
+        anchor_scale=4.0,
+        max_level=8,
+        fpn_name='bifpn_sum',  # Use unweighted sum for training stability.
+        backbone_args=dict(drop_path_rate=0.2),
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7x-f390b87c.pth'
+    ),
+
+    # The lite configs are in TF automl repository but no weights yet and listed as 'not final'
+    tf_efficientdet_lite0=dict(
+        name='tf_efficientdet_lite0',
+        backbone_name='tf_efficientnet_lite0',
+        image_size=(512, 512),
+        fpn_channels=64,
+        fpn_cell_repeats=3,
+        box_class_repeats=3,
+        act_type='relu',
+        redundant_bias=False,
+        backbone_args=dict(drop_path_rate=0.1),
+        # unlike other tf_ models, this was not ported from tf automl impl, but trained from tf pretrained efficient lite
+        # weights using this code, will likely replace if/when official det-lite weights are released
+        url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_lite0-f5f303a9.pth',
+    ),
+    tf_efficientdet_lite1=dict(
+        name='tf_efficientdet_lite1',
+        backbone_name='tf_efficientnet_lite1',
+        image_size=(640, 640),
+        fpn_channels=88,
+        fpn_cell_repeats=4,
+        box_class_repeats=3,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',  # no pretrained weights yet
+    ),
+    tf_efficientdet_lite2=dict(
+        name='tf_efficientdet_lite2',
+        backbone_name='tf_efficientnet_lite2',
+        image_size=(768, 768),
+        fpn_channels=112,
+        fpn_cell_repeats=5,
+        box_class_repeats=3,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    tf_efficientdet_lite3=dict(
+        name='tf_efficientdet_lite3',
+        backbone_name='tf_efficientnet_lite3',
+        image_size=(896, 896),
+        fpn_channels=160,
+        fpn_cell_repeats=6,
+        box_class_repeats=4,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+    tf_efficientdet_lite4=dict(
+        name='tf_efficientdet_lite4',
+        backbone_name='tf_efficientnet_lite4',
+        image_size=(1024, 1024),
+        fpn_channels=224,
+        fpn_cell_repeats=7,
+        box_class_repeats=4,
+        act_type='relu',
+        backbone_args=dict(drop_path_rate=0.2),
+        url='',
+    ),
+)
+
+
+def get_efficientdet_config(model_name='tf_efficientdet_d1'):
+    """Get the default config for EfficientDet based on model name."""
+    h = default_detection_model_configs()
+    h.update(efficientdet_model_param_dict[model_name])
+    h.num_levels = h.max_level - h.min_level + 1
+    return deepcopy(h)  # may be unnecessary, ensure no references to param dict values
diff --git a/efficientdet/effdet/config/train_config.py b/efficientdet/effdet/config/train_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..88deab0a5f3046c5e43d3c7ac8bb2269ee606875
--- /dev/null
+++ b/efficientdet/effdet/config/train_config.py
@@ -0,0 +1,34 @@
+from omegaconf import OmegaConf
+
+
+def default_detection_train_config():
+    # FIXME currently using args for train config, will revisit, perhaps move to Hydra
+    h = OmegaConf.create()
+
+    # dataset
+    h.skip_crowd_during_training = True
+
+    # augmentation
+    h.input_rand_hflip = True
+    h.train_scale_min = 0.1
+    h.train_scale_max = 2.0
+    h.autoaugment_policy = None
+
+    # optimization
+    h.momentum = 0.9
+    h.learning_rate = 0.08
+    h.lr_warmup_init = 0.008
+    h.lr_warmup_epoch = 1.0
+    h.first_lr_drop_epoch = 200.0
+    h.second_lr_drop_epoch = 250.0
+    h.clip_gradients_norm = 10.0
+    h.num_epochs = 300
+
+    # regularization l2 loss.
+    h.weight_decay = 4e-5
+
+    h.lr_decay_method = 'cosine'
+    h.moving_average_decay = 0.9998
+    h.ckpt_var_scope = None
+
+    return h
diff --git a/efficientdet/effdet/data/__init__.py b/efficientdet/effdet/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcc1ac591550d6f2f25afc3c9de08f28a2c07287
--- /dev/null
+++ b/efficientdet/effdet/data/__init__.py
@@ -0,0 +1,6 @@
+from .dataset_factory import create_dataset
+from .dataset import DetectionDatset, SkipSubset
+from .input_config import resolve_input_config
+from .loader import create_loader
+from .parsers import create_parser
+from .transforms import *
diff --git a/efficientdet/effdet/data/dataset.py b/efficientdet/effdet/data/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7296a46fd84aabed897718279dd52eb4e0707921
--- /dev/null
+++ b/efficientdet/effdet/data/dataset.py
@@ -0,0 +1,145 @@
+""" Detection dataset
+
+Hacked together by Ross Wightman
+"""
+import torch.utils.data as data
+import numpy as np
+import albumentations as A
+import torch
+
+from PIL import Image
+from .parsers import create_parser
+
+
+class DetectionDatset(data.Dataset):
+    """`Object Detection Dataset. Use with parsers for COCO, VOC, and OpenImages.
+    Args:
+        parser (string, Parser):
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+
+    """
+
+    def __init__(self, data_dir, parser=None, parser_kwargs=None, transform=None, transforms=None):
+        super(DetectionDatset, self).__init__()
+        parser_kwargs = parser_kwargs or {}
+        self.data_dir = data_dir
+        if isinstance(parser, str):
+            self._parser = create_parser(parser, **parser_kwargs)
+        else:
+            assert parser is not None and len(parser.img_ids)
+            self._parser = parser
+        self._transform = transform
+        self._transforms = transforms
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, annotations (target)).
+        """
+        img_info = self._parser.img_infos[index]
+        target = dict(img_idx=index, img_size=(img_info['width'], img_info['height']))
+        if self._parser.has_labels:
+            ann = self._parser.get_ann_info(index)
+            target.update(ann)
+        img_path = self.data_dir / img_info['file_name']
+        img = Image.open(img_path).convert('RGB')
+        if self.transforms is not None:
+            img = torch.as_tensor(np.array(img), dtype=torch.uint8)
+            voc_boxes = []
+            for coord in target['bbox']:
+                xmin = coord[1]
+                ymin = coord[0]
+                xmax = coord[3]
+                ymax = coord[2]
+                if xmin<1:
+                    xmin = 1
+                if ymin<1:
+                    ymin = 1
+                if xmax>=img.shape[1]-1:
+                    xmax = img.shape[1]-1
+                if ymax>=img.shape[0]-1:
+                    ymax = img.shape[0]-1
+                voc_boxes.append([xmin, ymin, xmax, ymax])
+            transformed = self.transforms(image=np.array(img), bbox_classes=target['cls'], bboxes=voc_boxes)
+            img = torch.as_tensor(transformed['image'], dtype=torch.uint8)
+            target['bbox'] = []
+            for coord in transformed['bboxes']:
+                ymin = int(coord[1])
+                xmin = int(coord[0])
+                ymax = int(coord[3])
+                xmax = int(coord[2])
+                target['bbox'].append([ymin, xmin, ymax, xmax])
+            target['bbox'] = np.array(target['bbox'], dtype=np.float32)
+            target['cls'] = np.array(transformed['bbox_classes'])
+            img = Image.fromarray(np.array(img).astype('uint8'), 'RGB')
+            target['img_size'] = img.size
+            
+        if self.transform is not None:
+            img, target = self.transform(img, target)
+
+        return img, target
+
+    def __len__(self):
+        return len(self._parser.img_ids)
+
+    @property
+    def parser(self):
+        return self._parser
+
+    @property
+    def transform(self):
+        return self._transform
+
+    @transform.setter
+    def transform(self, t):
+        self._transform = t
+
+    @property
+    def transforms(self):
+        return self._transforms
+
+    @transforms.setter
+    def transforms(self, t):
+        self._transforms = t
+
+class SkipSubset(data.Dataset):
+    r"""
+    Subset of a dataset at specified indices.
+
+    Arguments:
+        dataset (Dataset): The whole Dataset
+        n (int): skip rate (select every nth)
+    """
+    def __init__(self, dataset, n=2):
+        self.dataset = dataset
+        assert n >= 1
+        self.indices = np.arange(len(dataset))[::n]
+
+    def __getitem__(self, idx):
+        return self.dataset[self.indices[idx]]
+
+    def __len__(self):
+        return len(self.indices)
+
+    @property
+    def parser(self):
+        return self.dataset.parser
+
+    @property
+    def transform(self):
+        return self.dataset.transform
+
+    @transform.setter
+    def transform(self, t):
+        self.dataset.transform = t
+
+    @property
+    def transforms(self):
+        return self.dataset.transforms
+
+    @transforms.setter
+    def transforms(self, t):
+        self.dataset.transforms = t
diff --git a/efficientdet/effdet/data/dataset_config.py b/efficientdet/effdet/data/dataset_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5f17b4c368384a0bb98cd53fa2e144cf43d54df
--- /dev/null
+++ b/efficientdet/effdet/data/dataset_config.py
@@ -0,0 +1,194 @@
+""" COCO detect-waste dataset configurations
+
+Updated 2021 Wimlds in Detect Waste in Pomerania
+"""
+from dataclasses import dataclass
+from typing import Dict
+
+
+@dataclass
+class CocoCfg:
+    variant: str = None
+    parser: str = 'coco'
+    num_classes: int = 80
+    splits: Dict[str, dict] = None
+
+
+@dataclass
+class TACOCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 28
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class DetectwasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 7
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class BinaryCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class BinaryMultiCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class TrashCanCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 8
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class UAVVasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class ICRACfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 7
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class DrinkWasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 4
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class MJU_WasteCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
+
+
+@dataclass
+class WadeCfg(CocoCfg):
+    root: str = ""
+    ann: str = ""
+    variant: str = '2017'
+    num_classes: int = 1
+
+    def add_split(self):
+        self.splits = {
+            'train': {'ann_filename': self.ann+'_train.json',
+                      'img_dir': self.root,
+                      'has_labels': True},
+            'val': {'ann_filename': self.ann+'_test.json',
+                    'img_dir': self.root,
+                    'has_labels': True}
+            }
diff --git a/efficientdet/effdet/data/dataset_factory.py b/efficientdet/effdet/data/dataset_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..5497e2732637506148928f68e00adaddb79a6c92
--- /dev/null
+++ b/efficientdet/effdet/data/dataset_factory.py
@@ -0,0 +1,85 @@
+""" Dataset factory
+
+Updated 2021 Wimlds in Detect Waste in Pomerania
+"""
+from collections import OrderedDict
+from pathlib import Path
+
+from .dataset_config import *
+from .parsers import *
+from .dataset import DetectionDatset
+from .parsers import create_parser
+
+# list of detect-waste datasets
+waste_datasets_list = ['taco', 'detectwaste', 'binary', 'multi',
+                       'uav', 'mju', 'trashcan', 'wade', 'icra'
+                       'drinkwaste']
+
+
+def create_dataset(name, root, ann, splits=('train', 'val')):
+    if isinstance(splits, str):
+        splits = (splits,)
+    name = name.lower()
+    root = Path(root)
+    dataset_cls = DetectionDatset
+    datasets = OrderedDict()
+    if name.startswith('coco'):
+        if 'coco2014' in name:
+            dataset_cfg = Coco2014Cfg()
+        else:
+            dataset_cfg = Coco2017Cfg()
+        for s in splits:
+            if s not in dataset_cfg.splits:
+                raise RuntimeError(f'{s} split not found in config')
+            split_cfg = dataset_cfg.splits[s]
+            ann_file = root / split_cfg['ann_filename']
+            parser_cfg = CocoParserCfg(
+                ann_filename=ann_file,
+                has_labels=split_cfg['has_labels']
+            )
+            datasets[s] = dataset_cls(
+                data_dir=root / Path(split_cfg['img_dir']),
+                parser=create_parser(dataset_cfg.parser, cfg=parser_cfg),
+            )
+        datasets = OrderedDict()
+    elif name in waste_datasets_list:
+        if name.startswith('taco'):
+            dataset_cfg = TACOCfg(root=root, ann=ann)
+        elif name.startswith('detectwaste'):
+            dataset_cfg = DetectwasteCfg(root=root, ann=ann)
+        elif name.startswith('binary'):
+            dataset_cfg = BinaryCfg(root=root, ann=ann)
+        elif name.startswith('multi'):
+            dataset_cfg = BinaryMultiCfg(root=root, ann=ann)
+        elif name.startswith('uav'):
+            dataset_cfg = UAVVasteCfg(root=root, ann=ann)
+        elif name.startswith('trashcan'):
+            dataset_cfg = TrashCanCfg(root=root, ann=ann)
+        elif name.startswith('drinkwaste'):
+            dataset_cfg = DrinkWasteCfg(root=root, ann=ann)
+        elif name.startswith('mju'):
+            dataset_cfg = MJU_WasteCfg(root=root, ann=ann)
+        elif name.startswith('wade'):
+            dataset_cfg = WadeCfg(root=root, ann=ann)
+        elif name.startswith('icra'):
+            dataset_cfg = ICRACfg(root=root, ann=ann)
+        else:
+            assert False, f'Unknown dataset parser ({name})'
+        dataset_cfg.add_split()
+        for s in splits:
+            if s not in dataset_cfg.splits:
+                raise RuntimeError(f'{s} split not found in config')
+            split_cfg = dataset_cfg.splits[s]
+            parser_cfg = CocoParserCfg(
+                ann_filename=split_cfg['ann_filename'],
+                has_labels=split_cfg['has_labels']
+            )
+            datasets[s] = dataset_cls(
+                data_dir=split_cfg['img_dir'],
+                parser=create_parser(dataset_cfg.parser, cfg=parser_cfg),
+            )
+    else:
+        assert False, f'Unknown dataset parser ({name})'
+
+    datasets = list(datasets.values())
+    return datasets if len(datasets) > 1 else datasets[0]
diff --git a/efficientdet/effdet/data/input_config.py b/efficientdet/effdet/data/input_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d7e96e99161dc442afcb0b239afcb72939d08ea
--- /dev/null
+++ b/efficientdet/effdet/data/input_config.py
@@ -0,0 +1,60 @@
+from .transforms import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+
+
+def resolve_input_config(args, model_config=None, model=None):
+    if not isinstance(args, dict):
+        args = vars(args)
+    input_config = {}
+    if not model_config and model is not None and hasattr(model, 'config'):
+        model_config = model.config
+
+    # Resolve input/image size
+    in_chans = 3
+    input_size = (in_chans, 512, 512)
+
+    if 'input_size' in model_config:
+        input_size = tuple(model_config['input_size'])
+    elif 'image_size' in model_config:
+        input_size = (in_chans,) + tuple(model_config['image_size'])
+    assert isinstance(input_size, tuple) and len(input_size) == 3
+    input_config['input_size'] = input_size
+
+    # resolve interpolation method
+    input_config['interpolation'] = 'bicubic'
+    if 'interpolation' in args and args['interpolation']:
+        input_config['interpolation'] = args['interpolation']
+    elif 'interpolation' in model_config:
+        input_config['interpolation'] = model_config['interpolation']
+
+    # resolve dataset + model mean for normalization
+    input_config['mean'] = IMAGENET_DEFAULT_MEAN
+    if 'mean' in args and args['mean'] is not None:
+        mean = tuple(args['mean'])
+        if len(mean) == 1:
+            mean = tuple(list(mean) * in_chans)
+        else:
+            assert len(mean) == in_chans
+        input_config['mean'] = mean
+    elif 'mean' in model_config:
+        input_config['mean'] = model_config['mean']
+
+    # resolve dataset + model std deviation for normalization
+    input_config['std'] = IMAGENET_DEFAULT_STD
+    if 'std' in args and args['std'] is not None:
+        std = tuple(args['std'])
+        if len(std) == 1:
+            std = tuple(list(std) * in_chans)
+        else:
+            assert len(std) == in_chans
+        input_config['std'] = std
+    elif 'std' in model_config:
+        input_config['std'] = model_config['std']
+
+    # resolve letterbox fill color
+    input_config['fill_color'] = 'mean'
+    if 'fill_color' in args and args['fill_color'] is not None:
+        input_config['fill_color'] = args['fill_color']
+    elif 'fill_color' in model_config:
+        input_config['fill_color'] = model_config['fill_color']
+
+    return input_config
diff --git a/efficientdet/effdet/data/loader.py b/efficientdet/effdet/data/loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..9956d0e9090e8d94d57a72ce185924638de7871e
--- /dev/null
+++ b/efficientdet/effdet/data/loader.py
@@ -0,0 +1,226 @@
+""" Object detection loader/collate
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch.utils.data
+from .transforms import *
+from .transforms_albumentation import get_transform
+from .random_erasing import RandomErasing
+from effdet.anchors import AnchorLabeler
+from timm.data.distributed_sampler import OrderedDistributedSampler
+import os
+
+MAX_NUM_INSTANCES = 100
+
+
+class DetectionFastCollate:
+    """ A detection specific, optimized collate function w/ a bit of state.
+
+    Optionally performs anchor labelling. Doing this here offloads some work from the
+    GPU and the main training process thread and increases the load on the dataloader
+    threads.
+
+    """
+    def __init__(
+            self,
+            instance_keys=None,
+            instance_shapes=None,
+            instance_fill=-1,
+            max_instances=MAX_NUM_INSTANCES,
+            anchor_labeler=None,
+    ):
+        instance_keys = instance_keys or {'bbox', 'bbox_ignore', 'cls'}
+        instance_shapes = instance_shapes or dict(
+            bbox=(max_instances, 4), bbox_ignore=(max_instances, 4), cls=(max_instances,))
+        self.instance_info = {k: dict(fill=instance_fill, shape=instance_shapes[k]) for k in instance_keys}
+        self.max_instances = max_instances
+        self.anchor_labeler = anchor_labeler
+
+    def __call__(self, batch):
+        batch_size = len(batch)
+        target = dict()
+        labeler_outputs = dict()
+        img_tensor = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8)
+        for i in range(batch_size):
+            img_tensor[i] += torch.from_numpy(batch[i][0])
+            labeler_inputs = {}
+            for tk, tv in batch[i][1].items():
+                instance_info = self.instance_info.get(tk, None)
+                if instance_info is not None:
+                    # target tensor is associated with a detection instance
+                    tv = torch.from_numpy(tv).to(dtype=torch.float32)
+                    if self.anchor_labeler is None:
+                        if i == 0:
+                            shape = (batch_size,) + instance_info['shape']
+                            target_tensor = torch.full(shape, instance_info['fill'], dtype=torch.float32)
+                            target[tk] = target_tensor
+                        else:
+                            target_tensor = target[tk]
+                        num_elem = min(tv.shape[0], self.max_instances)
+                        target_tensor[i, 0:num_elem] = tv[0:num_elem]
+                    else:
+                        # no need to pass gt tensors through when labeler in use
+                        if tk in ('bbox', 'cls'):
+                            labeler_inputs[tk] = tv
+                else:
+                    # target tensor is an image-level annotation / metadata
+                    if i == 0:
+                        # first batch elem, create destination tensors
+                        if isinstance(tv, (tuple, list)):
+                            # per batch elem sequence
+                            shape = (batch_size, len(tv))
+                            dtype = torch.float32 if isinstance(tv[0], (float, np.floating)) else torch.int32
+                        else:
+                            # per batch elem scalar
+                            shape = batch_size,
+                            dtype = torch.float32 if isinstance(tv, (float, np.floating)) else torch.int64
+                        target_tensor = torch.zeros(shape, dtype=dtype)
+                        target[tk] = target_tensor
+                    else:
+                        target_tensor = target[tk]
+                    target_tensor[i] = torch.tensor(tv, dtype=target_tensor.dtype)
+
+            if self.anchor_labeler is not None:
+                cls_targets, box_targets, num_positives = self.anchor_labeler.label_anchors(
+                    labeler_inputs['bbox'], labeler_inputs['cls'], filter_valid=False)
+                if i == 0:
+                    # first batch elem, create destination tensors, separate key per level
+                    for j, (ct, bt) in enumerate(zip(cls_targets, box_targets)):
+                        labeler_outputs[f'label_cls_{j}'] = torch.zeros(
+                            (batch_size,) + ct.shape, dtype=torch.int64)
+                        labeler_outputs[f'label_bbox_{j}'] = torch.zeros(
+                            (batch_size,) + bt.shape, dtype=torch.float32)
+                    labeler_outputs['label_num_positives'] = torch.zeros(batch_size)
+                for j, (ct, bt) in enumerate(zip(cls_targets, box_targets)):
+                    labeler_outputs[f'label_cls_{j}'][i] = ct
+                    labeler_outputs[f'label_bbox_{j}'][i] = bt
+                labeler_outputs['label_num_positives'][i] = num_positives
+        if labeler_outputs:
+            target.update(labeler_outputs)
+
+        return img_tensor, target
+
+
+class PrefetchLoader:
+
+    def __init__(self,
+            loader,
+            mean=IMAGENET_DEFAULT_MEAN,
+            std=IMAGENET_DEFAULT_STD,
+            re_prob=0.,
+            re_mode='pixel',
+            re_count=1,
+            ):
+        self.loader = loader
+        self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1)
+        self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1)
+        if re_prob > 0.:
+            self.random_erasing = RandomErasing(probability=re_prob, mode=re_mode, max_count=re_count)
+        else:
+            self.random_erasing = None
+
+    def __iter__(self):
+        stream = torch.cuda.Stream()
+        first = True
+
+        for next_input, next_target in self.loader:
+            with torch.cuda.stream(stream):
+                next_input = next_input.cuda(non_blocking=True)
+                next_input = next_input.float().sub_(self.mean).div_(self.std)
+                next_target = {k: v.cuda(non_blocking=True) for k, v in next_target.items()}
+                if self.random_erasing is not None:
+                    next_input = self.random_erasing(next_input, next_target)
+
+            if not first:
+                yield input, target
+            else:
+                first = False
+
+            torch.cuda.current_stream().wait_stream(stream)
+            input = next_input
+            target = next_target
+
+        yield input, target
+
+    def __len__(self):
+        return len(self.loader)
+
+    @property
+    def sampler(self):
+        return self.loader.sampler
+
+    @property
+    def dataset(self):
+        return self.loader.dataset
+
+
+def create_loader(
+        dataset,
+        input_size,
+        batch_size,
+        is_training=False,
+        use_prefetcher=True,
+        re_prob=0.,
+        re_mode='pixel',
+        re_count=1,
+        interpolation='bilinear',
+        fill_color='mean',
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD,
+        num_workers=1,
+        distributed=False,
+        pin_mem=False,
+        anchor_labeler=None,
+):
+    if isinstance(input_size, tuple):
+        img_size = input_size[-2:]
+    else:
+        img_size = input_size
+
+    if is_training:
+        transforms = get_transform()
+        transform = transforms_coco_train(
+            img_size,
+            interpolation=interpolation,
+            use_prefetcher=use_prefetcher,
+            fill_color=fill_color,
+            mean=mean,
+            std=std)
+    else:
+        transforms = None
+        transform = transforms_coco_eval(
+            img_size,
+            interpolation=interpolation,
+            use_prefetcher=use_prefetcher,
+            fill_color=fill_color,
+            mean=mean,
+            std=std)
+    dataset.transforms = transforms
+    dataset.transform = transform
+
+    sampler = None
+    if distributed:
+        if is_training:
+            sampler = torch.utils.data.distributed.DistributedSampler(dataset)
+        else:
+            # This will add extra duplicate entries to result in equal num
+            # of samples per-process, will slightly alter validation results
+            sampler = OrderedDistributedSampler(dataset)
+
+    collate_fn = DetectionFastCollate(anchor_labeler=anchor_labeler)
+    loader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=sampler is None and is_training,
+        num_workers=num_workers,
+        sampler=sampler,
+        pin_memory=pin_mem,
+        collate_fn=collate_fn,
+    )
+    if use_prefetcher:
+        if is_training:
+            loader = PrefetchLoader(loader, mean=mean, std=std, re_prob=re_prob, re_mode=re_mode, re_count=re_count)
+        else:
+            loader = PrefetchLoader(loader, mean=mean, std=std)
+
+    return loader
diff --git a/efficientdet/effdet/data/parsers/__init__.py b/efficientdet/effdet/data/parsers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bea708cb8cc1c8a9e7a263dabc438211d9f79c2c
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/__init__.py
@@ -0,0 +1,2 @@
+from .parser_config import OpenImagesParserCfg, CocoParserCfg, VocParserCfg
+from .parser_factory import create_parser
diff --git a/efficientdet/effdet/data/parsers/parser.py b/efficientdet/effdet/data/parsers/parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..b593280520fb0eae22f9b127908a52da882ce76c
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/parser.py
@@ -0,0 +1,82 @@
+from numbers import Integral
+from typing import List, Union, Dict, Any
+
+
+class Parser:
+    """ Parser base class.
+
+    The attributes listed below make up a public interface common to all parsers. They can be accessed directly
+    once the dataset is constructed and annotations are populated.
+
+    Attributes:
+
+        cat_names (list[str]):
+            list of category (class) names, with background class at position 0.
+        cat_ids (list[union[str, int]):
+            list of dataset specific, unique integer or string category ids, does not include background
+        cat_id_to_label (dict):
+            map from category id to integer 1-indexed class label
+
+        img_ids (list):
+            list of dataset specific, unique image ids corresponding to valid samples in dataset
+        img_ids_invalid (list):
+            list of image ids corresponding to invalid images, not used as samples
+        img_infos (list[dict]):
+            image info, list of info dicts with filename, width, height for each image sample
+    """
+    def __init__(
+            self,
+            bbox_yxyx: bool = False,
+            has_labels: bool = True,
+            include_masks: bool = False,
+            include_bboxes_ignore: bool = False,
+            ignore_empty_gt: bool = False,
+            min_img_size: int = 32,
+    ):
+        """
+        Args:
+            yxyx (bool): output coords in yxyx format, otherwise xyxy
+            has_labels (bool): dataset has labels (for training validation, False usually for test sets)
+            include_masks (bool): include segmentation masks in target output (not supported yet for any dataset)
+            include_bboxes_ignore (bool): include ignored bbox in target output
+            ignore_empty_gt (bool): ignore images with no ground truth (no negative images)
+            min_img_size (bool): ignore images with width or height smaller than this number
+            sub_sample (int): sample every N images from the dataset
+        """
+        # parser config, determines how dataset parsed and validated
+        self.yxyx = bbox_yxyx
+        self.has_labels = has_labels
+        self.include_masks = include_masks
+        self.include_bboxes_ignore = include_bboxes_ignore
+        self.ignore_empty_gt = ignore_empty_gt
+        self.min_img_size = min_img_size
+        self.label_offset = 1
+
+        # Category (class) metadata. Populated by _load_annotations()
+        self.cat_names: List[str] = []
+        self.cat_ids: List[Union[str, Integral]] = []
+        self.cat_id_to_label: Dict[Union[str, Integral], Integral] = dict()
+
+        # Image metadata. Populated by _load_annotations()
+        self.img_ids: List[Union[str, Integral]] = []
+        self.img_ids_invalid: List[Union[str, Integral]] = []
+        self.img_infos: List[Dict[str, Any]] = []
+
+    @property
+    def cat_dicts(self):
+        """return category names and labels in format compatible with TF Models Evaluator
+        list[dict(name=<class name>, id=<class label>)]
+        """
+        return [
+            dict(
+                name=name,
+                id=cat_id if not self.cat_id_to_label else self.cat_id_to_label[cat_id]
+            ) for name, cat_id in zip(self.cat_names, self.cat_ids)]
+
+    @property
+    def max_label(self):
+        if self.cat_id_to_label:
+            return max(self.cat_id_to_label.values())
+        else:
+            assert len(self.cat_ids) and isinstance(self.cat_ids[0], Integral)
+            return max(self.cat_ids)
diff --git a/efficientdet/effdet/data/parsers/parser_coco.py b/efficientdet/effdet/data/parsers/parser_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..58bc2495c38c2b6835626528e37a4da119127d73
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/parser_coco.py
@@ -0,0 +1,93 @@
+""" COCO dataset parser
+
+Copyright 2020 Ross Wightman
+"""
+import numpy as np
+from pycocotools.coco import COCO
+from .parser import Parser
+from .parser_config import CocoParserCfg
+
+
+class CocoParser(Parser):
+
+    def __init__(self, cfg: CocoParserCfg):
+        super().__init__(
+            bbox_yxyx=cfg.bbox_yxyx,
+            has_labels=cfg.has_labels,
+            include_masks=cfg.include_masks,
+            include_bboxes_ignore=cfg.include_bboxes_ignore,
+            ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt,
+            min_img_size=cfg.min_img_size
+        )
+        self.cat_ids_as_labels = True  # this is the default for original TF EfficientDet models
+        self.coco = None
+        self._load_annotations(cfg.ann_filename)
+
+    def get_ann_info(self, idx):
+        img_id = self.img_ids[idx]
+        return self._parse_img_ann(img_id)
+
+    def _load_annotations(self, ann_file):
+        assert self.coco is None
+        self.coco = COCO(ann_file)
+        self.cat_ids = self.coco.getCatIds()
+        self.cat_names = [c['name'] for c in self.coco.loadCats(ids=self.cat_ids)]
+        if not self.cat_ids_as_labels:
+            self.cat_id_to_label = {cat_id: i + self.label_offset for i, cat_id in enumerate(self.cat_ids)}
+        img_ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
+        for img_id in sorted(self.coco.imgs.keys()):
+            info = self.coco.loadImgs([img_id])[0]
+            if (min(info['width'], info['height']) < self.min_img_size or
+                    (self.ignore_empty_gt and img_id not in img_ids_with_ann)):
+                self.img_ids_invalid.append(img_id)
+                continue
+            self.img_ids.append(img_id)
+            self.img_infos.append(info)
+
+    def _parse_img_ann(self, img_id):
+        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+        ann_info = self.coco.loadAnns(ann_ids)
+        bboxes = []
+        bboxes_ignore = []
+        cls = []
+
+        for i, ann in enumerate(ann_info):
+            if ann.get('ignore', False):
+                continue
+            x1, y1, w, h = ann['bbox']
+            if self.include_masks and ann['area'] <= 0:
+                continue
+            if w < 1 or h < 1:
+                continue
+
+            if self.yxyx:
+                bbox = [y1, x1, y1 + h, x1 + w]
+            else:
+                bbox = [x1, y1, x1 + w, y1 + h]
+
+            if ann.get('iscrowd', False):
+                if self.include_bboxes_ignore:
+                    bboxes_ignore.append(bbox)
+            else:
+                bboxes.append(bbox)
+                cls.append(self.cat_id_to_label[ann['category_id']] if self.cat_id_to_label else ann['category_id'])
+
+        if bboxes:
+            bboxes = np.array(bboxes, ndmin=2, dtype=np.float32)
+            cls = np.array(cls, dtype=np.int64)
+        else:
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+            cls = np.array([], dtype=np.int64)
+
+        if self.include_bboxes_ignore:
+            if bboxes_ignore:
+                bboxes_ignore = np.array(bboxes_ignore, ndmin=2, dtype=np.float32)
+            else:
+                bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+
+        ann = dict(bbox=bboxes, cls=cls)
+
+        if self.include_bboxes_ignore:
+            ann['bbox_ignore'] = bboxes_ignore
+
+        return ann
diff --git a/efficientdet/effdet/data/parsers/parser_config.py b/efficientdet/effdet/data/parsers/parser_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..8537d3e1b176e06a7d391552ae3f4fc602013270
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/parser_config.py
@@ -0,0 +1,49 @@
+""" Dataset parser configs
+
+Copyright 2020 Ross Wightman
+"""
+from dataclasses import dataclass
+
+__all__ = ['CocoParserCfg', 'OpenImagesParserCfg', 'VocParserCfg']
+
+
+@dataclass
+class CocoParserCfg:
+    ann_filename: str  # absolute path
+    include_masks: bool = False
+    include_bboxes_ignore: bool = False
+    has_labels: bool = True
+    bbox_yxyx: bool = True
+    min_img_size: int = 32
+    ignore_empty_gt: bool = False
+
+
+@dataclass
+class VocParserCfg:
+    split_filename: str
+    ann_filename: str
+    img_filename: str = '%.jpg'
+    keep_difficult: bool = True
+    classes: list = None
+    add_background: bool = True
+    has_labels: bool = True
+    bbox_yxyx: bool = True
+    min_img_size: int = 32
+    ignore_empty_gt: bool = False
+
+
+@dataclass
+class OpenImagesParserCfg:
+    categories_filename: str
+    img_info_filename: str
+    bbox_filename: str
+    img_label_filename: str = ''
+    masks_filename: str = ''
+    img_filename: str = '%s.jpg'  # relative to dataset img_dir
+    task: str = 'obj'
+    prefix_levels: int = 1
+    add_background: bool = True
+    has_labels: bool = True
+    bbox_yxyx: bool = True
+    min_img_size: int = 32
+    ignore_empty_gt: bool = False
diff --git a/efficientdet/effdet/data/parsers/parser_factory.py b/efficientdet/effdet/data/parsers/parser_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dcd46a740a5114902f28a22c9f186acefb81507
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/parser_factory.py
@@ -0,0 +1,19 @@
+""" Parser factory
+
+Copyright 2020 Ross Wightman
+"""
+from .parser_coco import CocoParser
+from .parser_voc import VocParser
+from .parser_open_images import OpenImagesParser
+
+
+def create_parser(name, **kwargs):
+    if name == 'coco':
+        parser = CocoParser(**kwargs)
+    elif name == 'voc':
+        parser = VocParser(**kwargs)
+    elif name == 'openimages':
+        parser = OpenImagesParser(**kwargs)
+    else:
+        assert False, f'Unknown dataset parser ({name})'
+    return parser
diff --git a/efficientdet/effdet/data/parsers/parser_open_images.py b/efficientdet/effdet/data/parsers/parser_open_images.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c201ac2df47924ac648c2de245b9ca807c1fbc0
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/parser_open_images.py
@@ -0,0 +1,211 @@
+""" OpenImages dataset parser
+
+Copyright 2020 Ross Wightman
+"""
+import numpy as np
+import os
+import logging
+
+from .parser import Parser
+from .parser_config import OpenImagesParserCfg
+
+_logger = logging.getLogger(__name__)
+
+
+class OpenImagesParser(Parser):
+
+    def __init__(self, cfg: OpenImagesParserCfg):
+        super().__init__(
+            bbox_yxyx=cfg.bbox_yxyx,
+            has_labels=cfg.has_labels,
+            include_masks=False,  # FIXME to support someday
+            include_bboxes_ignore=False,
+            ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt,
+            min_img_size=cfg.min_img_size
+        )
+        self.img_prefix_levels = cfg.prefix_levels
+        self.mask_prefix_levels = 1
+        self._anns = None  # access via get_ann_info()
+        self._img_to_ann = None
+        self._load_annotations(
+            categories_filename=cfg.categories_filename,
+            img_info_filename=cfg.img_info_filename,
+            img_filename=cfg.img_filename,
+            masks_filename=cfg.masks_filename,
+            bbox_filename=cfg.bbox_filename
+        )
+
+    def _load_annotations(
+            self,
+            categories_filename: str,
+            img_info_filename: str,
+            img_filename: str,
+            masks_filename: str,
+            bbox_filename: str,
+    ):
+        import pandas as pd  # For now, blow up on pandas req only when trying to load open images anno
+
+        _logger.info('Loading categories...')
+        classes_df = pd.read_csv(categories_filename, header=None)
+        self.cat_ids = classes_df[0].tolist()
+        self.cat_names = classes_df[1].tolist()
+        self.cat_id_to_label = {c: i + self.label_offset for i, c in enumerate(self.cat_ids)}
+
+        def _img_filename(img_id):
+            # build image filenames that are relative to img_dir
+            filename = img_filename % img_id
+            if self.img_prefix_levels:
+                levels = [c for c in img_id[:self.img_prefix_levels]]
+                filename = os.path.join(*levels, filename)
+            return filename
+
+        def _mask_filename(mask_path):
+            # FIXME finish
+            if self.mask_prefix_levels:
+                levels = [c for c in mask_path[:self.mask_prefix_levels]]
+                mask_path = os.path.join(*levels, mask_path)
+            return mask_path
+
+        def _load_img_info(csv_file, select_img_ids=None):
+            _logger.info('Read img_info csv...')
+            img_info_df = pd.read_csv(csv_file, index_col='id')
+
+            _logger.info('Filter images...')
+            if select_img_ids is not None:
+                img_info_df = img_info_df.loc[select_img_ids]
+            img_info_df = img_info_df[
+                (img_info_df['width'] >= self.min_img_size) & (img_info_df['height'] >= self.min_img_size)]
+
+            _logger.info('Mapping ids...')
+            img_info_df['img_id'] = img_info_df.index
+            img_info_df['file_name'] = img_info_df.index.map(lambda x: _img_filename(x))
+            img_info_df = img_info_df[['img_id', 'file_name', 'width', 'height']]
+            img_sizes = img_info_df[['width', 'height']].values
+            self.img_infos = img_info_df.to_dict('records')
+            self.img_ids = img_info_df.index.values.tolist()
+            img_id_to_idx = {img_id: idx for idx, img_id in enumerate(self.img_ids)}
+            return img_sizes, img_id_to_idx
+
+        if self.include_masks and self.has_labels:
+            masks_df = pd.read_csv(masks_filename)
+
+            # NOTE currently using dataset masks anno ImageIDs to form valid img_ids from the dataset
+            anno_img_ids = sorted(masks_df['ImageID'].unique())
+            img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids)
+
+            masks_df['ImageIdx'] = masks_df['ImageID'].map(img_id_to_idx)
+            if np.issubdtype(masks_df.ImageIdx.dtype, np.floating):
+                masks_df = masks_df.dropna(axis='rows')
+                masks_df['ImageIdx'] = masks_df.ImageIdx.astype(np.int32)
+            masks_df.sort_values('ImageIdx', inplace=True)
+            ann_img_idx = masks_df['ImageIdx'].values
+            img_sizes = img_sizes[ann_img_idx]
+            masks_df['BoxXMin'] = masks_df['BoxXMin'] * img_sizes[:, 0]
+            masks_df['BoxXMax'] = masks_df['BoxXMax'] * img_sizes[:, 0]
+            masks_df['BoxYMin'] = masks_df['BoxYMin'] * img_sizes[:, 1]
+            masks_df['BoxYMax'] = masks_df['BoxYMax'] * img_sizes[:, 1]
+            masks_df['LabelIdx'] = masks_df['LabelName'].map(self.cat_id_to_label)
+            # FIXME remap mask filename with _mask_filename
+
+            self._anns = dict(
+                bbox=masks_df[['BoxXMin', 'BoxYMin', 'BoxXMax', 'BoxYMax']].values.astype(np.float32),
+                label=masks_df[['LabelIdx']].values.astype(np.int32),
+                mask_path=masks_df[['MaskPath']].values
+            )
+            _, ri, rc = np.unique(ann_img_idx, return_index=True, return_counts=True)
+            self._img_to_ann = list(zip(ri, rc))  # index, count tuples
+        elif self.has_labels:
+            _logger.info('Loading bbox...')
+            bbox_df = pd.read_csv(bbox_filename)
+
+            # NOTE currently using dataset box anno ImageIDs to form valid img_ids from the larger dataset.
+            # FIXME use *imagelabels.csv or imagelabels-boxable.csv for negative examples (without box?)
+            anno_img_ids = sorted(bbox_df['ImageID'].unique())
+            img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids)
+
+            _logger.info('Process bbox...')
+            bbox_df['ImageIdx'] = bbox_df['ImageID'].map(img_id_to_idx)
+            if np.issubdtype(bbox_df.ImageIdx.dtype, np.floating):
+                bbox_df = bbox_df.dropna(axis='rows')
+                bbox_df['ImageIdx'] = bbox_df.ImageIdx.astype(np.int32)
+            bbox_df.sort_values('ImageIdx', inplace=True)
+            ann_img_idx = bbox_df['ImageIdx'].values
+            img_sizes = img_sizes[ann_img_idx]
+            bbox_df['XMin'] = bbox_df['XMin'] * img_sizes[:, 0]
+            bbox_df['XMax'] = bbox_df['XMax'] * img_sizes[:, 0]
+            bbox_df['YMin'] = bbox_df['YMin'] * img_sizes[:, 1]
+            bbox_df['YMax'] = bbox_df['YMax'] * img_sizes[:, 1]
+            bbox_df['LabelIdx'] = bbox_df['LabelName'].map(self.cat_id_to_label).astype(np.int32)
+
+            self._anns = dict(
+                bbox=bbox_df[['XMin', 'YMin', 'XMax', 'YMax']].values.astype(np.float32),
+                label=bbox_df[['LabelIdx', 'IsGroupOf']].values.astype(np.int32),
+            )
+            _, ri, rc = np.unique(ann_img_idx, return_index=True, return_counts=True)
+            self._img_to_ann = list(zip(ri, rc))  # index, count tuples
+        else:
+            _load_img_info(img_info_filename)
+
+        _logger.info('Annotations loaded!')
+
+    def get_ann_info(self, idx):
+        if not self.has_labels:
+            return dict()
+        start_idx, num_ann = self._img_to_ann[idx]
+        ann_keys = tuple(self._anns.keys())
+        ann_values = tuple(self._anns[k][start_idx:start_idx + num_ann] for k in ann_keys)
+        return self._parse_ann_info(idx, ann_keys, ann_values)
+
+    def _parse_ann_info(self, img_idx, ann_keys, ann_values):
+        """
+        """
+        gt_bboxes = []
+        gt_labels = []
+        gt_bboxes_ignore = []
+        if self.include_masks:
+            assert 'mask_path' in ann_keys
+            gt_masks = []
+
+        for ann in zip(*ann_values):
+            ann = dict(zip(ann_keys, ann))
+            x1, y1, x2, y2 = ann['bbox']
+            if x2 - x1 < 1 or y2 - y1 < 1:
+                continue
+            label = ann['label'][0]
+            iscrowd = False
+            if len(ann['label']) > 1:
+                iscrowd = ann['label'][1]
+            if self.yxyx:
+                bbox = np.array([y1, x1, y2, x2], dtype=np.float32)
+            else:
+                bbox = ann['bbox']
+            if iscrowd:
+                gt_bboxes_ignore.append(bbox)
+            else:
+                gt_bboxes.append(bbox)
+                gt_labels.append(label)
+            # if self.include_masks:
+            #     img_info = self.img_infos[img_idx]
+            #     mask_img = SegmentationMask(ann['mask_filename'], img_info['width'], img_info['height'])
+            #     gt_masks.append(mask_img)
+
+        if gt_bboxes:
+            gt_bboxes = np.array(gt_bboxes, ndmin=2, dtype=np.float32)
+            gt_labels = np.array(gt_labels, dtype=np.int64)
+        else:
+            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
+            gt_labels = np.array([], dtype=np.int64)
+
+        if self.include_bboxes_ignore:
+            if gt_bboxes_ignore:
+                gt_bboxes_ignore = np.array(gt_bboxes_ignore, ndmin=2, dtype=np.float32)
+            else:
+                gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+
+        ann = dict(bbox=gt_bboxes, cls=gt_labels)
+
+        if self.include_bboxes_ignore:
+            ann.update(dict(bbox_ignore=gt_bboxes_ignore, cls_ignore=np.array([], dtype=np.int64)))
+        if self.include_masks:
+            ann['masks'] = gt_masks
+        return ann
diff --git a/efficientdet/effdet/data/parsers/parser_voc.py b/efficientdet/effdet/data/parsers/parser_voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..554d43315d7c56f6a073109eab7d4af06c1cc8a7
--- /dev/null
+++ b/efficientdet/effdet/data/parsers/parser_voc.py
@@ -0,0 +1,148 @@
+""" Pascal VOC dataset parser
+
+Copyright 2020 Ross Wightman
+"""
+import os
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+import numpy as np
+
+from .parser import Parser
+from .parser_config import VocParserCfg
+
+
+class VocParser(Parser):
+
+    DEFAULT_CLASSES = (
+        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair',
+        'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant',
+        'sheep', 'sofa', 'train', 'tvmonitor')
+
+    def __init__(self, cfg: VocParserCfg):
+        super().__init__(
+            bbox_yxyx=cfg.bbox_yxyx,
+            has_labels=cfg.has_labels,
+            include_masks=False,  # FIXME to support someday
+            include_bboxes_ignore=False,
+            ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt,
+            min_img_size=cfg.min_img_size
+        )
+        self.correct_bbox = 1
+        self.keep_difficult = cfg.keep_difficult
+
+        self.anns = None
+        self.img_id_to_idx = {}
+        self._load_annotations(
+            split_filename=cfg.split_filename,
+            img_filename=cfg.img_filename,
+            ann_filename=cfg.ann_filename,
+            classes=cfg.classes,
+        )
+
+    def _load_annotations(
+            self,
+            split_filename: str,
+            img_filename: str,
+            ann_filename: str,
+            classes=None,
+    ):
+        classes = classes or self.DEFAULT_CLASSES
+        self.cat_names = list(classes)
+        self.cat_ids = self.cat_names
+        self.cat_id_to_label = {cat: i + self.label_offset for i, cat in enumerate(self.cat_ids)}
+
+        self.anns = []
+
+        with open(split_filename) as f:
+            ids = f.readlines()
+        for img_id in ids:
+            img_id = img_id.strip("\n")
+            filename = img_filename % img_id
+            xml_path = ann_filename % img_id
+            tree = ET.parse(xml_path)
+            root = tree.getroot()
+            size = root.find('size')
+            width = int(size.find('width').text)
+            height = int(size.find('height').text)
+            if min(width, height) < self.min_img_size:
+                continue
+
+            anns = []
+            for obj_idx, obj in enumerate(root.findall('object')):
+                name = obj.find('name').text
+                label = self.cat_id_to_label[name]
+                difficult = int(obj.find('difficult').text)
+                bnd_box = obj.find('bndbox')
+                bbox = [
+                    int(bnd_box.find('xmin').text),
+                    int(bnd_box.find('ymin').text),
+                    int(bnd_box.find('xmax').text),
+                    int(bnd_box.find('ymax').text)
+                ]
+                anns.append(dict(label=label, bbox=bbox, difficult=difficult))
+
+            if not self.ignore_empty_gt or len(anns):
+                self.anns.append(anns)
+                self.img_infos.append(dict(id=img_id, file_name=filename, width=width, height=height))
+                self.img_ids.append(img_id)
+            else:
+                self.img_ids_invalid.append(img_id)
+
+    def merge(self, other):
+        assert len(self.cat_ids) == len(other.cat_ids)
+        self.img_ids.extend(other.img_ids)
+        self.img_infos.extend(other.img_infos)
+        self.anns.extend(other.anns)
+
+    def get_ann_info(self, idx):
+        return self._parse_ann_info(self.anns[idx])
+
+    def _parse_ann_info(self, ann_info):
+        bboxes = []
+        labels = []
+        bboxes_ignore = []
+        labels_ignore = []
+        for ann in ann_info:
+            ignore = False
+            x1, y1, x2, y2 = ann['bbox']
+            label = ann['label']
+            w = x2 - x1
+            h = y2 - y1
+            if w < 1 or h < 1:
+                ignore = True
+            if self.yxyx:
+                bbox = [y1, x1, y2, x2]
+            else:
+                bbox = ann['bbox']
+            if ignore or (ann['difficult'] and not self.keep_difficult):
+                bboxes_ignore.append(bbox)
+                labels_ignore.append(label)
+            else:
+                bboxes.append(bbox)
+                labels.append(label)
+
+        if not bboxes:
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+            labels = np.zeros((0, ), dtype=np.float32)
+        else:
+            bboxes = np.array(bboxes, ndmin=2, dtype=np.float32) - self.correct_bbox
+            labels = np.array(labels, dtype=np.float32)
+
+        if self.include_bboxes_ignore:
+            if not bboxes_ignore:
+                bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
+                labels_ignore = np.zeros((0, ), dtype=np.float32)
+            else:
+                bboxes_ignore = np.array(bboxes_ignore, ndmin=2, dtype=np.float32) - self.correct_bbox
+                labels_ignore = np.array(labels_ignore, dtype=np.float32)
+
+        ann = dict(
+            bbox=bboxes.astype(np.float32),
+            cls=labels.astype(np.int64))
+
+        if self.include_bboxes_ignore:
+            ann.update(dict(
+                bbox_ignore=bboxes_ignore.astype(np.float32),
+                cls_ignore=labels_ignore.astype(np.int64)))
+        return ann
+
diff --git a/efficientdet/effdet/data/random_erasing.py b/efficientdet/effdet/data/random_erasing.py
new file mode 100644
index 0000000000000000000000000000000000000000..ded751ecf0b22a106f5eccd78fdc4fe3b83a44fd
--- /dev/null
+++ b/efficientdet/effdet/data/random_erasing.py
@@ -0,0 +1,94 @@
+""" Multi-Scale RandomErasing
+
+Copyright 2020 Ross Wightman
+"""
+import random
+import math
+import torch
+
+
+def _get_pixels(per_pixel, rand_color, patch_size, dtype=torch.float32, device='cuda'):
+    # NOTE I've seen CUDA illegal memory access errors being caused by the normal_()
+    # paths, flip the order so normal is run on CPU if this becomes a problem
+    # Issue has been fixed in master https://github.com/pytorch/pytorch/issues/19508
+    if per_pixel:
+        return torch.empty(patch_size, dtype=dtype, device=device).normal_()
+    elif rand_color:
+        return torch.empty((patch_size[0], 1, 1), dtype=dtype, device=device).normal_()
+    else:
+        return torch.zeros((patch_size[0], 1, 1), dtype=dtype, device=device)
+
+
+class RandomErasing:
+    """ Randomly selects a rectangle region in an image and erases its pixels.
+        'Random Erasing Data Augmentation' by Zhong et al.
+        See https://arxiv.org/pdf/1708.04896.pdf
+
+        This variant of RandomErasing is tweaked for multi-scale obj detection training.
+    Args:
+         probability: Probability that the Random Erasing operation will be performed.
+         min_area: Minimum percentage of erased area wrt input image area.
+         max_area: Maximum percentage of erased area wrt input image area.
+         min_aspect: Minimum aspect ratio of erased area.
+         mode: pixel color mode, one of 'const', 'rand', or 'pixel'
+            'const' - erase block is constant color of 0 for all channels
+            'rand'  - erase block is same per-channel random (normal) color
+            'pixel' - erase block is per-pixel random (normal) color
+        max_count: maximum number of erasing blocks per image, area per box is scaled by count.
+            per-image count is randomly chosen between 1 and this value.
+    """
+
+    def __init__(
+            self,
+            probability=0.5, min_area=0.02, max_area=1/4, min_aspect=0.3, max_aspect=None,
+            mode='const', min_count=1, max_count=None, num_splits=0, device='cuda'):
+        self.probability = probability
+        self.min_area = min_area
+        self.max_area = max_area
+        max_aspect = max_aspect or 1 / min_aspect
+        self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect))
+        self.min_count = min_count
+        self.max_count = max_count or min_count
+        self.num_splits = num_splits
+        mode = mode.lower()
+        self.rand_color = False
+        self.per_pixel = False
+        if mode == 'rand':
+            self.rand_color = True  # per block random normal
+        elif mode == 'pixel':
+            self.per_pixel = True  # per pixel random normal
+        else:
+            assert not mode or mode == 'const'
+        self.device = device
+
+    def _erase(self, img, chan, img_h, img_w, dtype):
+        if random.random() > self.probability:
+            return
+        area = img_h * img_w
+        count = self.min_count if self.min_count == self.max_count else \
+            random.randint(self.min_count, self.max_count)
+        for _ in range(count):
+            for attempt in range(10):
+                target_area = random.uniform(self.min_area, self.max_area) * area / count
+                aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio))
+                h = int(round(math.sqrt(target_area * aspect_ratio)))
+                w = int(round(math.sqrt(target_area / aspect_ratio)))
+                if w < img_w and h < img_h:
+                    top = random.randint(0, img_h - h)
+                    left = random.randint(0, img_w - w)
+                    img[:, top:top + h, left:left + w] = _get_pixels(
+                        self.per_pixel, self.rand_color, (chan, h, w),
+                        dtype=dtype, device=self.device)
+                    break
+
+    def __call__(self, input, target):
+        batch_size, chan, input_h, input_w = input.shape
+        img_scales = target['img_scale']
+        img_size = (target['img_size'] / img_scales.unsqueeze(1)).int()
+        img_size[:, 0] = img_size[:, 0].clamp(max=input_w)
+        img_size[:, 1] = img_size[:, 1].clamp(max=input_h)
+        # skip first slice of batch if num_splits is set (for clean portion of samples)
+        batch_start = batch_size // self.num_splits if self.num_splits > 1 else 0
+        for i in range(batch_start, batch_size):
+            self._erase(input[i], chan, img_size[i, 1], img_size[i, 0], input.dtype)
+        return input
diff --git a/efficientdet/effdet/data/transforms.py b/efficientdet/effdet/data/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..c62b7727a4395f23927700efef6d59c1d36af2c5
--- /dev/null
+++ b/efficientdet/effdet/data/transforms.py
@@ -0,0 +1,275 @@
+""" COCO transforms (quick and dirty)
+
+Hacked together by Ross Wightman
+"""
+import torch
+from PIL import Image
+import numpy as np
+import random
+import math
+
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
+IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5)
+
+
+class ImageToNumpy:
+
+    def __call__(self, pil_img, annotations: dict):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.moveaxis(np_img, 2, 0)  # HWC to CHW
+        return np_img, annotations
+
+
+class ImageToTensor:
+
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img, annotations: dict):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.moveaxis(np_img, 2, 0)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype), annotations
+
+
+def _pil_interp(method):
+    if method == 'bicubic':
+        return Image.BICUBIC
+    elif method == 'lanczos':
+        return Image.LANCZOS
+    elif method == 'hamming':
+        return Image.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return Image.BILINEAR
+
+
+def clip_boxes_(boxes, img_size):
+    height, width = img_size
+    clip_upper = np.array([height, width] * 2, dtype=boxes.dtype)
+    np.clip(boxes, 0, clip_upper, out=boxes)
+
+
+def clip_boxes(boxes, img_size):
+    clipped_boxes = boxes.copy()
+    clip_boxes_(clipped_boxes, img_size)
+    return clipped_boxes
+
+
+def _size_tuple(size):
+    if isinstance(size, int):
+        return size, size
+    else:
+        assert len(size) == 2
+        return size
+
+
+class ResizePad:
+
+    def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)):
+        self.target_size = _size_tuple(target_size)
+        self.interpolation = interpolation
+        self.fill_color = fill_color
+
+    def __call__(self, img, anno: dict):
+        width, height = img.size
+
+        img_scale_y = self.target_size[0] / height
+        img_scale_x = self.target_size[1] / width
+        img_scale = min(img_scale_y, img_scale_x)
+        scaled_h = int(height * img_scale)
+        scaled_w = int(width * img_scale)
+
+        new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
+        interp_method = _pil_interp(self.interpolation)
+        img = img.resize((scaled_w, scaled_h), interp_method)
+        new_img.paste(img)
+
+        if 'bbox' in anno:
+            # FIXME haven't tested this path since not currently using dataset annotations for train/eval
+            bbox = anno['bbox']
+            bbox[:, :4] *= img_scale
+            clip_boxes_(bbox, (scaled_h, scaled_w))
+            valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
+            anno['bbox'] = bbox[valid_indices, :]
+            anno['cls'] = anno['cls'][valid_indices]
+
+        anno['img_scale'] = 1. / img_scale  # back to original
+
+        return new_img, anno
+
+
+class RandomResizePad:
+
+    def __init__(self, target_size: int, scale: tuple = (0.1, 2.0), interpolation: str = 'bilinear',
+                 fill_color: tuple = (0, 0, 0)):
+        self.target_size = _size_tuple(target_size)
+        self.scale = scale
+        self.interpolation = interpolation
+        self.fill_color = fill_color
+
+    def _get_params(self, img):
+        # Select a random scale factor.
+        scale_factor = random.uniform(*self.scale)
+        scaled_target_height = scale_factor * self.target_size[0]
+        scaled_target_width = scale_factor * self.target_size[1]
+
+        # Recompute the accurate scale_factor using rounded scaled image size.
+        width, height = img.size
+        img_scale_y = scaled_target_height / height
+        img_scale_x = scaled_target_width / width
+        img_scale = min(img_scale_y, img_scale_x)
+
+        # Select non-zero random offset (x, y) if scaled image is larger than target size
+        scaled_h = int(height * img_scale)
+        scaled_w = int(width * img_scale)
+        offset_y = scaled_h - self.target_size[0]
+        offset_x = scaled_w - self.target_size[1]
+        offset_y = int(max(0.0, float(offset_y)) * random.uniform(0, 1))
+        offset_x = int(max(0.0, float(offset_x)) * random.uniform(0, 1))
+        return scaled_h, scaled_w, offset_y, offset_x, img_scale
+
+    def __call__(self, img, anno: dict):
+        scaled_h, scaled_w, offset_y, offset_x, img_scale = self._get_params(img)
+
+        interp_method = _pil_interp(self.interpolation)
+        img = img.resize((scaled_w, scaled_h), interp_method)
+        right, lower = min(scaled_w, offset_x + self.target_size[1]), min(scaled_h, offset_y + self.target_size[0])
+        img = img.crop((offset_x, offset_y, right, lower))
+        new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
+        new_img.paste(img)
+
+        if 'bbox' in anno:
+            # FIXME not fully tested
+            bbox = anno['bbox'].copy()  # FIXME copy for debugger inspection, back to inplace
+            bbox[:, :4] *= img_scale
+            box_offset = np.stack([offset_y, offset_x] * 2)
+            bbox -= box_offset
+            clip_boxes_(bbox, (scaled_h, scaled_w))
+            valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
+            anno['bbox'] = bbox[valid_indices, :]
+            anno['cls'] = anno['cls'][valid_indices]
+
+        anno['img_scale'] = 1. / img_scale  # back to original
+
+        return new_img, anno
+
+
+class RandomFlip:
+
+    def __init__(self, horizontal=True, vertical=False, prob=0.5):
+        self.horizontal = horizontal
+        self.vertical = vertical
+        self.prob = prob
+
+    def _get_params(self):
+        do_horizontal = random.random() < self.prob if self.horizontal else False
+        do_vertical = random.random() < self.prob if self.vertical else False
+        return do_horizontal, do_vertical
+
+    def __call__(self, img, annotations: dict):
+        do_horizontal, do_vertical = self._get_params()
+        width, height = img.size
+
+        def _fliph(bbox):
+            x_max = width - bbox[:, 1]
+            x_min = width - bbox[:, 3]
+            bbox[:, 1] = x_min
+            bbox[:, 3] = x_max
+
+        def _flipv(bbox):
+            y_max = height - bbox[:, 0]
+            y_min = height - bbox[:, 2]
+            bbox[:, 0] = y_min
+            bbox[:, 2] = y_max
+
+        if do_horizontal and do_vertical:
+            img = img.transpose(Image.ROTATE_180)
+            if 'bbox' in annotations:
+                _fliph(annotations['bbox'])
+                _flipv(annotations['bbox'])
+        elif do_horizontal:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+            if 'bbox' in annotations:
+                _fliph(annotations['bbox'])
+        elif do_vertical:
+            img = img.transpose(Image.FLIP_TOP_BOTTOM)
+            if 'bbox' in annotations:
+                _flipv(annotations['bbox'])
+
+        return img, annotations
+
+
+def resolve_fill_color(fill_color, img_mean=IMAGENET_DEFAULT_MEAN):
+    if isinstance(fill_color, tuple):
+        assert len(fill_color) == 3
+        fill_color = fill_color
+    else:
+        try:
+            int_color = int(fill_color)
+            fill_color = (int_color,) * 3
+        except ValueError:
+            assert fill_color == 'mean'
+            fill_color = tuple([int(round(255 * x)) for x in img_mean])
+    return fill_color
+
+
+class Compose:
+
+    def __init__(self, transforms: list):
+        self.transforms = transforms
+
+    def __call__(self, img, annotations: dict):
+        for t in self.transforms:
+            img, annotations = t(img, annotations)
+        return img, annotations
+
+
+def transforms_coco_eval(
+        img_size=224,
+        interpolation='bilinear',
+        use_prefetcher=False,
+        fill_color='mean',
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD):
+
+    fill_color = resolve_fill_color(fill_color, mean)
+
+    image_tfl = [
+        ResizePad(
+            target_size=img_size, interpolation=interpolation, fill_color=fill_color),
+        ImageToNumpy(),
+    ]
+
+    assert use_prefetcher, "Only supporting prefetcher usage right now"
+
+    image_tf = Compose(image_tfl)
+    return image_tf
+
+
+def transforms_coco_train(
+        img_size=224,
+        interpolation='random',
+        use_prefetcher=False,
+        fill_color='mean',
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD):
+
+    fill_color = resolve_fill_color(fill_color, mean)
+
+    image_tfl = [
+        RandomFlip(horizontal=True, prob=0.5),
+        RandomResizePad(
+            target_size=img_size, interpolation=interpolation, fill_color=fill_color),
+        ImageToNumpy(),
+    ]
+
+    assert use_prefetcher, "Only supporting prefetcher usage right now"
+
+    image_tf = Compose(image_tfl)
+    return image_tf
diff --git a/efficientdet/effdet/data/transforms_albumentation.py b/efficientdet/effdet/data/transforms_albumentation.py
new file mode 100755
index 0000000000000000000000000000000000000000..446bf27a49d4a7b87ef96937da1e0951342c6c76
--- /dev/null
+++ b/efficientdet/effdet/data/transforms_albumentation.py
@@ -0,0 +1,23 @@
+import albumentations as A
+
+from albumentations.augmentations.transforms import (
+    RandomBrightness, Downscale, RandomFog, RandomRain, RandomSnow)
+
+from albumentations.augmentations.blur.transforms import Blur
+
+def get_transform():
+    transforms = A.Compose([
+        #HorizontalFlip(p=0.5),
+        #VerticalFlip(p=0.5),
+        #RandomSizedBBoxSafeCrop(700, 700, erosion_rate=0.0, interpolation=1, always_apply=False, p=0.5),
+        Blur(blur_limit=7, always_apply=False, p=0.5),
+        RandomBrightness(limit=0.2, always_apply=False, p=0.5),
+        #Downscale(scale_min=0.5, scale_max=0.9, interpolation=0, always_apply=False, p=0.5),
+        #PadIfNeeded(min_height=1024, min_width=1024, pad_height_divisor=None, pad_width_divisor=None, border_mode=4, value=None, mask_value=None, always_apply=False, p=1.0),
+        #RandomFog(fog_coef_lower=0.3, fog_coef_upper=1, alpha_coef=0.08, always_apply=False, p=0.2),
+        #RandomRain(slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), p=0.2),
+        #RandomSnow(snow_point_lower=0.1, snow_point_upper=0.3, brightness_coeff=2.5, always_apply=False, p=0.2)
+        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['bbox_classes'])
+    )
+    return transforms
+    
\ No newline at end of file
diff --git a/efficientdet/effdet/distributed.py b/efficientdet/effdet/distributed.py
new file mode 100644
index 0000000000000000000000000000000000000000..63f024eb5cb9f5d209b39158d762d0da714d7a0a
--- /dev/null
+++ b/efficientdet/effdet/distributed.py
@@ -0,0 +1,308 @@
+""" PyTorch distributed helpers
+
+Some of this lifted from Detectron2 with other fns added by myself. Some of the Detectron2 fns
+were intended for use with GLOO PG. I am using NCCL here with default PG so not everything will work
+as is -RW
+"""
+import functools
+import logging
+import numpy as np
+import pickle
+import torch
+import torch.distributed as dist
+
+_LOCAL_PROCESS_GROUP = None
+"""
+A torch process group which only includes processes that on the same machine as the current process.
+This variable is set when processes are spawned by `launch()` in "engine/launch.py".
+"""
+
+
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+
+
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    assert _LOCAL_PROCESS_GROUP is not None
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+
+
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+
+
+def is_main_process() -> bool:
+    return get_rank() == 0
+
+
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+        world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+
+    max_size = max(size_list)
+
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+
+
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+
+    tensor = _serialize_to_tensor(data, group)
+
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
+    dist.all_gather(tensor_list, tensor, group=group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+
+
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+
+
+def all_gather_container(container, group=None, cat_dim=0):
+    group = group or dist.group.WORLD
+    world_size = dist.get_world_size(group)
+
+    def _do_gather(tensor):
+        tensor_list = [torch.empty_like(tensor) for _ in range(world_size)]
+        dist.all_gather(tensor_list, tensor, group=group)
+        return torch.cat(tensor_list, dim=cat_dim)
+
+    if isinstance(container, dict):
+        gathered = dict()
+        for k, v in container.items():
+            v = _do_gather(v)
+            gathered[k] = v
+        return gathered
+    elif isinstance(container, (list, tuple)):
+        gathered = [_do_gather(v) for v in container]
+        if isinstance(container, tuple):
+            gathered = tuple(gathered)
+        return gathered
+    else:
+        # if not a dict, list, tuple, expect a singular tensor
+        assert isinstance(container, torch.Tensor)
+        return _do_gather(container)
+
+
+def gather_container(container, dst, group=None, cat_dim=0):
+    group = group or dist.group.WORLD
+    world_size = dist.get_world_size(group)
+    this_rank = dist.get_rank(group)
+
+    def _do_gather(tensor):
+        if this_rank == dst:
+            tensor_list = [torch.empty_like(tensor) for _ in range(world_size)]
+        else:
+            tensor_list = None
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+        return torch.cat(tensor_list, dim=cat_dim)
+
+    if isinstance(container, dict):
+        gathered = dict()
+        for k, v in container.items():
+            v = _do_gather(v)
+            gathered[k] = v
+        return gathered
+    elif isinstance(container, (list, tuple)):
+        gathered = [_do_gather(v) for v in container]
+        if isinstance(container, tuple):
+            gathered = tuple(gathered)
+        return gathered
+    else:
+        # if not a dict, list, tuple, expect a singular tensor
+        assert isinstance(container, torch.Tensor)
+        return _do_gather(container)
diff --git a/efficientdet/effdet/efficientdet.py b/efficientdet/effdet/efficientdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..47dcdc5359b0038857aaf5061756c130e16cf57d
--- /dev/null
+++ b/efficientdet/effdet/efficientdet.py
@@ -0,0 +1,557 @@
+""" PyTorch EfficientDet model
+
+Based on official Tensorflow version at: https://github.com/google/automl/tree/master/efficientdet
+Paper: https://arxiv.org/abs/1911.09070
+
+Hacked together by Ross Wightman
+"""
+import torch
+import torch.nn as nn
+import logging
+import math
+from collections import OrderedDict
+from typing import List, Callable
+from functools import partial
+
+
+from timm import create_model
+from timm.models.layers import create_conv2d, drop_path, create_pool2d, Swish, get_act_layer
+from .config import get_fpn_config, set_config_writeable, set_config_readonly
+
+_DEBUG = False
+
+_ACT_LAYER = Swish
+
+
+class SequentialList(nn.Sequential):
+    """ This module exists to work around torchscript typing issues list -> list"""
+    def __init__(self, *args):
+        super(SequentialList, self).__init__(*args)
+
+    def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        for module in self:
+            x = module(x)
+        return x
+
+
+class ConvBnAct2d(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, padding='', bias=False,
+                 norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER):
+        super(ConvBnAct2d, self).__init__()
+        self.conv = create_conv2d(
+            in_channels, out_channels, kernel_size, stride=stride, dilation=dilation, padding=padding, bias=bias)
+        self.bn = None if norm_layer is None else norm_layer(out_channels)
+        self.act = None if act_layer is None else act_layer(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.act is not None:
+            x = self.act(x)
+        return x
+
+
+class SeparableConv2d(nn.Module):
+    """ Separable Conv
+    """
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False,
+                 channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER):
+        super(SeparableConv2d, self).__init__()
+        self.conv_dw = create_conv2d(
+            in_channels, int(in_channels * channel_multiplier), kernel_size,
+            stride=stride, dilation=dilation, padding=padding, depthwise=True)
+
+        self.conv_pw = create_conv2d(
+            int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias)
+
+        self.bn = None if norm_layer is None else norm_layer(out_channels)
+        self.act = None if act_layer is None else act_layer(inplace=True)
+
+    def forward(self, x):
+        x = self.conv_dw(x)
+        x = self.conv_pw(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.act is not None:
+            x = self.act(x)
+        return x
+
+
+class ResampleFeatureMap(nn.Sequential):
+
+    def __init__(self, in_channels, out_channels, reduction_ratio=1., pad_type='', pooling_type='max',
+                 norm_layer=nn.BatchNorm2d, apply_bn=False, conv_after_downsample=False, redundant_bias=False):
+        super(ResampleFeatureMap, self).__init__()
+        pooling_type = pooling_type or 'max'
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.reduction_ratio = reduction_ratio
+        self.conv_after_downsample = conv_after_downsample
+
+        conv = None
+        if in_channels != out_channels:
+            conv = ConvBnAct2d(
+                in_channels, out_channels, kernel_size=1, padding=pad_type,
+                norm_layer=norm_layer if apply_bn else None,
+                bias=not apply_bn or redundant_bias, act_layer=None)
+
+        if reduction_ratio > 1:
+            stride_size = int(reduction_ratio)
+            if conv is not None and not self.conv_after_downsample:
+                self.add_module('conv', conv)
+            self.add_module(
+                'downsample',
+                create_pool2d(
+                    pooling_type, kernel_size=stride_size + 1, stride=stride_size, padding=pad_type))
+            if conv is not None and self.conv_after_downsample:
+                self.add_module('conv', conv)
+        else:
+            if conv is not None:
+                self.add_module('conv', conv)
+            if reduction_ratio < 1:
+                scale = int(1 // reduction_ratio)
+                self.add_module('upsample', nn.UpsamplingNearest2d(scale_factor=scale))
+
+    # def forward(self, x):
+    #     #  here for debugging only
+    #     assert x.shape[1] == self.in_channels
+    #     if self.reduction_ratio > 1:
+    #         if hasattr(self, 'conv') and not self.conv_after_downsample:
+    #             x = self.conv(x)
+    #         x = self.downsample(x)
+    #         if hasattr(self, 'conv') and self.conv_after_downsample:
+    #             x = self.conv(x)
+    #     else:
+    #         if hasattr(self, 'conv'):
+    #             x = self.conv(x)
+    #         if self.reduction_ratio < 1:
+    #             x = self.upsample(x)
+    #     return x
+
+
+class FpnCombine(nn.Module):
+    def __init__(self, feature_info, fpn_config, fpn_channels, inputs_offsets, target_reduction, pad_type='',
+                 pooling_type='max', norm_layer=nn.BatchNorm2d, apply_bn_for_resampling=False,
+                 conv_after_downsample=False, redundant_bias=False, weight_method='attn'):
+        super(FpnCombine, self).__init__()
+        self.inputs_offsets = inputs_offsets
+        self.weight_method = weight_method
+
+        self.resample = nn.ModuleDict()
+        for idx, offset in enumerate(inputs_offsets):
+            in_channels = fpn_channels
+            if offset < len(feature_info):
+                in_channels = feature_info[offset]['num_chs']
+                input_reduction = feature_info[offset]['reduction']
+            else:
+                node_idx = offset - len(feature_info)
+                input_reduction = fpn_config.nodes[node_idx]['reduction']
+            reduction_ratio = target_reduction / input_reduction
+            self.resample[str(offset)] = ResampleFeatureMap(
+                in_channels, fpn_channels, reduction_ratio=reduction_ratio, pad_type=pad_type,
+                pooling_type=pooling_type, norm_layer=norm_layer, apply_bn=apply_bn_for_resampling,
+                conv_after_downsample=conv_after_downsample, redundant_bias=redundant_bias)
+
+        if weight_method == 'attn' or weight_method == 'fastattn':
+            self.edge_weights = nn.Parameter(torch.ones(len(inputs_offsets)), requires_grad=True)  # WSM
+        else:
+            self.edge_weights = None
+
+    def forward(self, x: List[torch.Tensor]):
+        dtype = x[0].dtype
+        nodes = []
+        for offset, resample in zip(self.inputs_offsets, self.resample.values()):
+            input_node = x[offset]
+            input_node = resample(input_node)
+            nodes.append(input_node)
+
+        if self.weight_method == 'attn':
+            normalized_weights = torch.softmax(self.edge_weights.to(dtype=dtype), dim=0)
+            out = torch.stack(nodes, dim=-1) * normalized_weights
+        elif self.weight_method == 'fastattn':
+            edge_weights = nn.functional.relu(self.edge_weights.to(dtype=dtype))
+            weights_sum = torch.sum(edge_weights)
+            out = torch.stack(
+                [(nodes[i] * edge_weights[i]) / (weights_sum + 0.0001) for i in range(len(nodes))], dim=-1)
+        elif self.weight_method == 'sum':
+            out = torch.stack(nodes, dim=-1)
+        else:
+            raise ValueError('unknown weight_method {}'.format(self.weight_method))
+        out = torch.sum(out, dim=-1)
+        return out
+
+
+class Fnode(nn.Module):
+    """ A simple wrapper used in place of nn.Sequential for torchscript typing
+    Handles input type List[Tensor] -> output type Tensor
+    """
+    def __init__(self, combine: nn.Module, after_combine: nn.Module):
+        super(Fnode, self).__init__()
+        self.combine = combine
+        self.after_combine = after_combine
+
+    def forward(self, x: List[torch.Tensor]) -> torch.Tensor:
+        return self.after_combine(self.combine(x))
+
+
+class BiFpnLayer(nn.Module):
+    def __init__(self, feature_info, fpn_config, fpn_channels, num_levels=5, pad_type='',
+                 pooling_type='max', norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER,
+                 apply_bn_for_resampling=False, conv_after_downsample=True, conv_bn_relu_pattern=False,
+                 separable_conv=True, redundant_bias=False):
+        super(BiFpnLayer, self).__init__()
+        self.num_levels = num_levels
+        self.conv_bn_relu_pattern = False
+
+        self.feature_info = []
+        self.fnode = nn.ModuleList()
+        for i, fnode_cfg in enumerate(fpn_config.nodes):
+            logging.debug('fnode {} : {}'.format(i, fnode_cfg))
+            reduction = fnode_cfg['reduction']
+            combine = FpnCombine(
+                feature_info, fpn_config, fpn_channels, tuple(fnode_cfg['inputs_offsets']),
+                target_reduction=reduction, pad_type=pad_type, pooling_type=pooling_type, norm_layer=norm_layer,
+                apply_bn_for_resampling=apply_bn_for_resampling, conv_after_downsample=conv_after_downsample,
+                redundant_bias=redundant_bias, weight_method=fnode_cfg['weight_method'])
+
+            after_combine = nn.Sequential()
+            conv_kwargs = dict(
+                in_channels=fpn_channels, out_channels=fpn_channels, kernel_size=3, padding=pad_type,
+                bias=False, norm_layer=norm_layer, act_layer=act_layer)
+            if not conv_bn_relu_pattern:
+                conv_kwargs['bias'] = redundant_bias
+                conv_kwargs['act_layer'] = None
+                after_combine.add_module('act', act_layer(inplace=True))
+            after_combine.add_module(
+                'conv', SeparableConv2d(**conv_kwargs) if separable_conv else ConvBnAct2d(**conv_kwargs))
+
+            self.fnode.append(Fnode(combine=combine, after_combine=after_combine))
+            self.feature_info.append(dict(num_chs=fpn_channels, reduction=reduction))
+
+        self.feature_info = self.feature_info[-num_levels::]
+
+    def forward(self, x: List[torch.Tensor]):
+        for fn in self.fnode:
+            x.append(fn(x))
+        return x[-self.num_levels::]
+
+
+class BiFpn(nn.Module):
+
+    def __init__(self, config, feature_info):
+        super(BiFpn, self).__init__()
+        self.num_levels = config.num_levels
+        norm_layer = config.norm_layer or nn.BatchNorm2d
+        if config.norm_kwargs:
+            norm_layer = partial(norm_layer, **config.norm_kwargs)
+        act_layer = get_act_layer(config.act_type) or _ACT_LAYER
+        fpn_config = config.fpn_config or get_fpn_config(
+            config.fpn_name, min_level=config.min_level, max_level=config.max_level)
+
+        self.resample = nn.ModuleDict()
+        for level in range(config.num_levels):
+            if level < len(feature_info):
+                in_chs = feature_info[level]['num_chs']
+                reduction = feature_info[level]['reduction']
+            else:
+                # Adds a coarser level by downsampling the last feature map
+                reduction_ratio = 2
+                self.resample[str(level)] = ResampleFeatureMap(
+                    in_channels=in_chs,
+                    out_channels=config.fpn_channels,
+                    pad_type=config.pad_type,
+                    pooling_type=config.pooling_type,
+                    norm_layer=norm_layer,
+                    reduction_ratio=reduction_ratio,
+                    apply_bn=config.apply_bn_for_resampling,
+                    conv_after_downsample=config.conv_after_downsample,
+                    redundant_bias=config.redundant_bias,
+                )
+                in_chs = config.fpn_channels
+                reduction = int(reduction * reduction_ratio)
+                feature_info.append(dict(num_chs=in_chs, reduction=reduction))
+
+        self.cell = SequentialList()
+        for rep in range(config.fpn_cell_repeats):
+            logging.debug('building cell {}'.format(rep))
+            fpn_layer = BiFpnLayer(
+                feature_info=feature_info,
+                fpn_config=fpn_config,
+                fpn_channels=config.fpn_channels,
+                num_levels=config.num_levels,
+                pad_type=config.pad_type,
+                pooling_type=config.pooling_type,
+                norm_layer=norm_layer,
+                act_layer=act_layer,
+                separable_conv=config.separable_conv,
+                apply_bn_for_resampling=config.apply_bn_for_resampling,
+                conv_after_downsample=config.conv_after_downsample,
+                conv_bn_relu_pattern=config.conv_bn_relu_pattern,
+                redundant_bias=config.redundant_bias,
+            )
+            self.cell.add_module(str(rep), fpn_layer)
+            feature_info = fpn_layer.feature_info
+
+    def forward(self, x: List[torch.Tensor]):
+        for resample in self.resample.values():
+            x.append(resample(x[-1]))
+        x = self.cell(x)
+        return x
+
+
+class HeadNet(nn.Module):
+
+    def __init__(self, config, num_outputs):
+        super(HeadNet, self).__init__()
+        self.num_levels = config.num_levels
+        self.bn_level_first = getattr(config, 'head_bn_level_first', False)
+        norm_layer = config.norm_layer or nn.BatchNorm2d
+        if config.norm_kwargs:
+            norm_layer = partial(norm_layer, **config.norm_kwargs)
+        act_layer = get_act_layer(config.act_type) or _ACT_LAYER
+
+        # Build convolution repeats
+        conv_fn = SeparableConv2d if config.separable_conv else ConvBnAct2d
+        conv_kwargs = dict(
+            in_channels=config.fpn_channels, out_channels=config.fpn_channels, kernel_size=3,
+            padding=config.pad_type, bias=config.redundant_bias, act_layer=None, norm_layer=None)
+        self.conv_rep = nn.ModuleList([conv_fn(**conv_kwargs) for _ in range(config.box_class_repeats)])
+
+        # Build batchnorm repeats. There is a unique batchnorm per feature level for each repeat.
+        # This can be organized with repeats first or feature levels first in module lists, the original models
+        # and weights were setup with repeats first, levels first is required for efficient torchscript usage.
+        self.bn_rep = nn.ModuleList()
+        if self.bn_level_first:
+            for _ in range(self.num_levels):
+                self.bn_rep.append(nn.ModuleList([
+                    norm_layer(config.fpn_channels) for _ in range(config.box_class_repeats)]))
+        else:
+            for _ in range(config.box_class_repeats):
+                self.bn_rep.append(nn.ModuleList([
+                    nn.Sequential(OrderedDict([('bn', norm_layer(config.fpn_channels))]))
+                    for _ in range(self.num_levels)]))
+
+        self.act = act_layer(inplace=True)
+
+        # Prediction (output) layer. Has bias with special init reqs, see init fn.
+        num_anchors = len(config.aspect_ratios) * config.num_scales
+        predict_kwargs = dict(
+            in_channels=config.fpn_channels, out_channels=num_outputs * num_anchors, kernel_size=3,
+            padding=config.pad_type, bias=True, norm_layer=None, act_layer=None)
+        self.predict = conv_fn(**predict_kwargs)
+
+    @torch.jit.ignore()
+    def toggle_bn_level_first(self):
+        """ Toggle the batchnorm layers between feature level first vs repeat first access pattern
+        Limitations in torchscript require feature levels to be iterated over first.
+
+        This function can be used to allow loading weights in the original order, and then toggle before
+        jit scripting the model.
+        """
+        with torch.no_grad():
+            new_bn_rep = nn.ModuleList()
+            for i in range(len(self.bn_rep[0])):
+                bn_first = nn.ModuleList()
+                for r in self.bn_rep.children():
+                    m = r[i]
+                    # NOTE original rep first model def has extra Sequential container with 'bn', this was
+                    # flattened in the level first definition.
+                    bn_first.append(m[0] if isinstance(m, nn.Sequential) else nn.Sequential(OrderedDict([('bn', m)])))
+                new_bn_rep.append(bn_first)
+            self.bn_level_first = not self.bn_level_first
+            self.bn_rep = new_bn_rep
+
+    @torch.jit.ignore()
+    def _forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        outputs = []
+        for level in range(self.num_levels):
+            x_level = x[level]
+            for conv, bn in zip(self.conv_rep, self.bn_rep):
+                x_level = conv(x_level)
+                x_level = bn[level](x_level)  # this is not allowed in torchscript
+                x_level = self.act(x_level)
+            outputs.append(self.predict(x_level))
+        return outputs
+
+    def _forward_level_first(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        outputs = []
+        for level, bn_rep in enumerate(self.bn_rep):  # iterating over first bn dim first makes TS happy
+            x_level = x[level]
+            for conv, bn in zip(self.conv_rep, bn_rep):
+                x_level = conv(x_level)
+                x_level = bn(x_level)
+                x_level = self.act(x_level)
+            outputs.append(self.predict(x_level))
+        return outputs
+
+    def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
+        if self.bn_level_first:
+            return self._forward_level_first(x)
+        else:
+            return self._forward(x)
+
+
+def _init_weight(m, n='', ):
+    """ Weight initialization as per Tensorflow official implementations.
+    """
+
+    def _fan_in_out(w, groups=1):
+        dimensions = w.dim()
+        if dimensions < 2:
+            raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
+        num_input_fmaps = w.size(1)
+        num_output_fmaps = w.size(0)
+        receptive_field_size = 1
+        if w.dim() > 2:
+            receptive_field_size = w[0][0].numel()
+        fan_in = num_input_fmaps * receptive_field_size
+        fan_out = num_output_fmaps * receptive_field_size
+        fan_out //= groups
+        return fan_in, fan_out
+
+    def _glorot_uniform(w, gain=1, groups=1):
+        fan_in, fan_out = _fan_in_out(w, groups)
+        gain /= max(1., (fan_in + fan_out) / 2.)  # fan avg
+        limit = math.sqrt(3.0 * gain)
+        w.data.uniform_(-limit, limit)
+
+    def _variance_scaling(w, gain=1, groups=1):
+        fan_in, fan_out = _fan_in_out(w, groups)
+        gain /= max(1., fan_in)  # fan in
+        # gain /= max(1., (fan_in + fan_out) / 2.)  # fan
+
+        # should it be normal or trunc normal? using normal for now since no good trunc in PT
+        # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
+        # std = math.sqrt(gain) / .87962566103423978
+        # w.data.trunc_normal(std=std)
+        std = math.sqrt(gain)
+        w.data.normal_(std=std)
+
+    if isinstance(m, SeparableConv2d):
+        if 'box_net' in n or 'class_net' in n:
+            _variance_scaling(m.conv_dw.weight, groups=m.conv_dw.groups)
+            _variance_scaling(m.conv_pw.weight)
+            if m.conv_pw.bias is not None:
+                if 'class_net.predict' in n:
+                    m.conv_pw.bias.data.fill_(-math.log((1 - 0.01) / 0.01))
+                else:
+                    m.conv_pw.bias.data.zero_()
+        else:
+            _glorot_uniform(m.conv_dw.weight, groups=m.conv_dw.groups)
+            _glorot_uniform(m.conv_pw.weight)
+            if m.conv_pw.bias is not None:
+                m.conv_pw.bias.data.zero_()
+    elif isinstance(m, ConvBnAct2d):
+        if 'box_net' in n or 'class_net' in n:
+            m.conv.weight.data.normal_(std=.01)
+            if m.conv.bias is not None:
+                if 'class_net.predict' in n:
+                    m.conv.bias.data.fill_(-math.log((1 - 0.01) / 0.01))
+                else:
+                    m.conv.bias.data.zero_()
+        else:
+            _glorot_uniform(m.conv.weight)
+            if m.conv.bias is not None:
+                m.conv.bias.data.zero_()
+    elif isinstance(m, nn.BatchNorm2d):
+        # looks like all bn init the same?
+        m.weight.data.fill_(1.0)
+        m.bias.data.zero_()
+
+
+def _init_weight_alt(m, n='', ):
+    """ Weight initialization alternative, based on EfficientNet bacbkone init w/ class bias addition
+    NOTE: this will likely be removed after some experimentation
+    """
+    if isinstance(m, nn.Conv2d):
+        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+        fan_out //= m.groups
+        m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
+        if m.bias is not None:
+            if 'class_net.predict' in n:
+                m.bias.data.fill_(-math.log((1 - 0.01) / 0.01))
+            else:
+                m.bias.data.zero_()
+    elif isinstance(m, nn.BatchNorm2d):
+        m.weight.data.fill_(1.0)
+        m.bias.data.zero_()
+
+
+def get_feature_info(backbone):
+    if isinstance(backbone.feature_info, Callable):
+        # old accessor for timm versions <= 0.1.30, efficientnet and mobilenetv3 and related nets only
+        feature_info = [dict(num_chs=f['num_chs'], reduction=f['reduction'])
+                        for i, f in enumerate(backbone.feature_info())]
+    else:
+        # new feature info accessor, timm >= 0.2, all models supported
+        feature_info = backbone.feature_info.get_dicts(keys=['num_chs', 'reduction'])
+    return feature_info
+
+
+class EfficientDet(nn.Module):
+
+    def __init__(self, config, pretrained_backbone=True, alternate_init=False):
+        super(EfficientDet, self).__init__()
+        self.config = config
+        set_config_readonly(self.config)
+        self.backbone = create_model(
+            config.backbone_name, features_only=True, out_indices=(2, 3, 4),
+            pretrained=pretrained_backbone, **config.backbone_args)
+        feature_info = get_feature_info(self.backbone)
+        self.fpn = BiFpn(self.config, feature_info)
+        self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes)
+        self.box_net = HeadNet(self.config, num_outputs=4)
+
+        for n, m in self.named_modules():
+            if 'backbone' not in n:
+                if alternate_init:
+                    _init_weight_alt(m, n)
+                else:
+                    _init_weight(m, n)
+
+    @torch.jit.ignore()
+    def reset_head(self, num_classes=None, aspect_ratios=None, num_scales=None, alternate_init=False):
+        reset_class_head = False
+        reset_box_head = False
+        set_config_writeable(self.config)
+        if num_classes is not None:
+            reset_class_head = True
+            self.config.num_classes = num_classes
+        if aspect_ratios is not None:
+            reset_box_head = True
+            self.config.aspect_ratios = aspect_ratios
+        if num_scales is not None:
+            reset_box_head = True
+            self.config.num_scales = num_scales
+        set_config_readonly(self.config)
+
+        if reset_class_head:
+            self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes)
+            for n, m in self.class_net.named_modules(prefix='class_net'):
+                if alternate_init:
+                    _init_weight_alt(m, n)
+                else:
+                    _init_weight(m, n)
+
+        if reset_box_head:
+            self.box_net = HeadNet(self.config, num_outputs=4)
+            for n, m in self.box_net.named_modules(prefix='box_net'):
+                if alternate_init:
+                    _init_weight_alt(m, n)
+                else:
+                    _init_weight(m, n)
+
+    @torch.jit.ignore()
+    def toggle_head_bn_level_first(self):
+        """ Toggle the head batchnorm layers between being access with feature_level first vs repeat
+        """
+        self.class_net.toggle_bn_level_first()
+        self.box_net.toggle_bn_level_first()
+
+    def forward(self, x):
+        x = self.backbone(x)
+        x = self.fpn(x)
+        x_class = self.class_net(x)
+        x_box = self.box_net(x)
+        return x_class, x_box
diff --git a/efficientdet/effdet/evaluation/README.md b/efficientdet/effdet/evaluation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3546caa9900f652968bffb7bb8593a1cdd824228
--- /dev/null
+++ b/efficientdet/effdet/evaluation/README.md
@@ -0,0 +1,7 @@
+# Tensorflow Models Evaluation
+
+The code in this folder has been extracted and adapted from evaluation/evaluator code at https://github.com/tensorflow/models/tree/master/research/object_detection/utils
+
+Original code is licensed Apache 2.0, Copyright Google Inc.
+https://github.com/tensorflow/models/blob/master/LICENSE
+ 
\ No newline at end of file
diff --git a/efficientdet/effdet/evaluation/__init__.py b/efficientdet/effdet/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/efficientdet/effdet/evaluation/detection_evaluator.py b/efficientdet/effdet/evaluation/detection_evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..385204cd088544592d4291de9c1436f620335162
--- /dev/null
+++ b/efficientdet/effdet/evaluation/detection_evaluator.py
@@ -0,0 +1,590 @@
+from abc import ABCMeta
+from abc import abstractmethod
+#import collections
+import logging
+import unicodedata
+import numpy as np
+
+from .fields import InputDataFields, DetectionResultFields
+from .object_detection_evaluation import ObjectDetectionEvaluation
+
+
+def create_category_index(categories):
+    """Creates dictionary of COCO compatible categories keyed by category id.
+    Args:
+        categories: a list of dicts, each of which has the following keys:
+            'id': (required) an integer id uniquely identifying this category.
+            'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza'.
+    Returns:
+        category_index: a dict containing the same entries as categories, but keyed
+            by the 'id' field of each category.
+    """
+    category_index = {}
+    for cat in categories:
+        category_index[cat['id']] = cat
+    return category_index
+
+
+class DetectionEvaluator(metaclass=ABCMeta):
+    """Interface for object detection evalution classes.
+    Example usage of the Evaluator:
+    ------------------------------
+    evaluator = DetectionEvaluator(categories)
+    # Detections and groundtruth for image 1.
+    evaluator.add_single_gt_image_info(...)
+    evaluator.add_single_detected_image_info(...)
+    # Detections and groundtruth for image 2.
+    evaluator.add_single_gt_image_info(...)
+    evaluator.add_single_detected_image_info(...)
+    metrics_dict = evaluator.evaluation()
+    """
+
+    def __init__(self, categories):
+        """Constructor.
+        Args:
+          categories: A list of dicts, each of which has the following keys -
+            'id': (required) an integer id uniquely identifying this category.
+            'name': (required) string representing category name e.g., 'cat', 'dog'.
+        """
+        self._categories = categories
+
+    def observe_result_dict_for_single_example(self, eval_dict):
+        """Observes an evaluation result dict for a single example.
+        When executing eagerly, once all observations have been observed by this
+        method you can use `.evaluation()` to get the final metrics.
+        When using `tf.estimator.Estimator` for evaluation this function is used by
+        `get_estimator_eval_metric_ops()` to construct the metric update op.
+        Args:
+            eval_dict: A dictionary that holds tensors for evaluating an object
+                detection model, returned from
+                eval_util.result_dict_for_single_example().
+        Returns:
+            None when executing eagerly, or an update_op that can be used to update
+            the eval metrics in `tf.estimator.EstimatorSpec`.
+        """
+        raise NotImplementedError('Not implemented for this evaluator!')
+
+    @abstractmethod
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary of groundtruth numpy arrays required for evaluations.
+        """
+        pass
+
+    @abstractmethod
+    def add_single_detected_image_info(self, image_id, detections_dict):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            detections_dict: A dictionary of detection numpy arrays required for evaluation.
+        """
+        pass
+
+    @abstractmethod
+    def evaluate(self):
+        """Evaluates detections and returns a dictionary of metrics."""
+        pass
+
+    @abstractmethod
+    def clear(self):
+        """Clears the state to prepare for a fresh evaluation."""
+        pass
+
+
+class ObjectDetectionEvaluator(DetectionEvaluator):
+    """A class to evaluation detections."""
+
+    def __init__(self,
+                 categories,
+                 matching_iou_threshold=0.5,
+                 recall_lower_bound=0.0,
+                 recall_upper_bound=1.0,
+                 evaluate_corlocs=False,
+                 evaluate_precision_recall=False,
+                 metric_prefix=None,
+                 use_weighted_mean_ap=False,
+                 evaluate_masks=False,
+                 group_of_weight=0.0):
+        """Constructor.
+        Args:
+            categories: A list of dicts, each of which has the following keys -
+                'id': (required) an integer id uniquely identifying this category.
+                'name': (required) string representing category name e.g., 'cat', 'dog'.
+            matching_iou_threshold: IOU threshold to use for matching groundtruth boxes to detection boxes.
+            recall_lower_bound: lower bound of recall operating area.
+            recall_upper_bound: upper bound of recall operating area.
+            evaluate_corlocs: (optional) boolean which determines if corloc scores are to be returned or not.
+            evaluate_precision_recall: (optional) boolean which determines if
+                precision and recall values are to be returned or not.
+            metric_prefix: (optional) string prefix for metric name; if None, no prefix is used.
+            use_weighted_mean_ap: (optional) boolean which determines if the mean
+                average precision is computed directly from the scores and tp_fp_labels of all classes.
+            evaluate_masks: If False, evaluation will be performed based on boxes. If
+                True, mask evaluation will be performed instead.
+            group_of_weight: Weight of group-of boxes.If set to 0, detections of the
+                correct class within a group-of box are ignored. If weight is > 0, then
+                if at least one detection falls within a group-of box with
+                matching_iou_threshold, weight group_of_weight is added to true
+                positives. Consequently, if no detection falls within a group-of box,
+                weight group_of_weight is added to false negatives.
+        Raises:
+            ValueError: If the category ids are not 1-indexed.
+        """
+        super(ObjectDetectionEvaluator, self).__init__(categories)
+        self._num_classes = max([cat['id'] for cat in categories])
+        if min(cat['id'] for cat in categories) < 1:
+            raise ValueError('Classes should be 1-indexed.')
+        self._matching_iou_threshold = matching_iou_threshold
+        self._recall_lower_bound = recall_lower_bound
+        self._recall_upper_bound = recall_upper_bound
+        self._use_weighted_mean_ap = use_weighted_mean_ap
+        self._label_id_offset = 1
+        self._evaluate_masks = evaluate_masks
+        self._group_of_weight = group_of_weight
+        self._evaluation = ObjectDetectionEvaluation(
+            num_gt_classes=self._num_classes,
+            matching_iou_threshold=self._matching_iou_threshold,
+            recall_lower_bound=self._recall_lower_bound,
+            recall_upper_bound=self._recall_upper_bound,
+            use_weighted_mean_ap=self._use_weighted_mean_ap,
+            label_id_offset=self._label_id_offset,
+            group_of_weight=self._group_of_weight)
+        self._image_ids = set([])
+        self._evaluate_corlocs = evaluate_corlocs
+        self._evaluate_precision_recall = evaluate_precision_recall
+        self._metric_prefix = (metric_prefix + '_') if metric_prefix else ''
+        self._build_metric_names()
+
+    def _build_metric_names(self):
+        """Builds a list with metric names."""
+        if self._recall_lower_bound > 0.0 or self._recall_upper_bound < 1.0:
+            self._metric_names = [
+                self._metric_prefix + 'Precision/mAP@{}IOU@[{:.1f},{:.1f}]Recall'.format(
+                    self._matching_iou_threshold, self._recall_lower_bound, self._recall_upper_bound)
+            ]
+        else:
+            self._metric_names = [
+                self._metric_prefix + 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold)
+            ]
+        if self._evaluate_corlocs:
+            self._metric_names.append(
+                self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold))
+
+        category_index = create_category_index(self._categories)
+        for idx in range(self._num_classes):
+            if idx + self._label_id_offset in category_index:
+                category_name = category_index[idx + self._label_id_offset]['name']
+                category_name = unicodedata.normalize('NFKD', category_name)
+                self._metric_names.append(
+                    self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name))
+                if self._evaluate_corlocs:
+                    self._metric_names.append(
+                        self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
+                            self._matching_iou_threshold, category_name))
+
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary containing -
+                InputDataFields.gt_boxes: float32 numpy array
+                    of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
+                    the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+                InputDataFields.gt_classes: integer numpy array
+                    of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes.
+                InputDataFields.gt_difficult: Optional length M numpy boolean array
+                    denoting whether a ground truth box is a difficult instance or not.
+                    This field is optional to support the case that no boxes are difficult.
+                InputDataFields.gt_instance_masks: Optional numpy array of shape
+                    [num_boxes, height, width] with values in {0, 1}.
+        Raises:
+            ValueError: On adding groundtruth for an image more than once. Will also
+                raise error if instance masks are not in groundtruth dictionary.
+        """
+        if image_id in self._image_ids:
+            return
+
+        gt_classes = gt_dict[InputDataFields.gt_classes] - self._label_id_offset
+        # If the key is not present in the gt_dict or the array is empty
+        # (unless there are no annotations for the groundtruth on this image)
+        # use values from the dictionary or insert None otherwise.
+        if (InputDataFields.gt_difficult in gt_dict and
+                (gt_dict[InputDataFields.gt_difficult].size or not gt_classes.size)):
+            gt_difficult = gt_dict[InputDataFields.gt_difficult]
+        else:
+            gt_difficult = None
+            # FIXME disable difficult flag warning, will support flag eventually
+            # if not len(self._image_ids) % 1000:
+            #     logging.warning('image %s does not have groundtruth difficult flag specified', image_id)
+        gt_masks = None
+        if self._evaluate_masks:
+            if InputDataFields.gt_instance_masks not in gt_dict:
+                raise ValueError('Instance masks not in groundtruth dictionary.')
+            gt_masks = gt_dict[InputDataFields.gt_instance_masks]
+        self._evaluation.add_single_ground_truth_image_info(
+            image_key=image_id,
+            gt_boxes=gt_dict[InputDataFields.gt_boxes],
+            gt_class_labels=gt_classes,
+            gt_is_difficult_list=gt_difficult,
+            gt_masks=gt_masks)
+        self._image_ids.update([image_id])
+
+    def add_single_detected_image_info(self, image_id, detections_dict):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            detections_dict: A dictionary containing -
+                DetectionResultFields.detection_boxes: float32 numpy
+                    array of shape [num_boxes, 4] containing `num_boxes` detection boxes
+                    of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+                DetectionResultFields.detection_scores: float32 numpy
+                    array of shape [num_boxes] containing detection scores for the boxes.
+                DetectionResultFields.detection_classes: integer numpy
+                    array of shape [num_boxes] containing 1-indexed detection classes for the boxes.
+                DetectionResultFields.detection_masks: uint8 numpy array
+                    of shape [num_boxes, height, width] containing `num_boxes` masks of
+                    values ranging between 0 and 1.
+        Raises:
+            ValueError: If detection masks are not in detections dictionary.
+        """
+        detection_classes = detections_dict[DetectionResultFields.detection_classes] - self._label_id_offset
+        detection_masks = None
+        if self._evaluate_masks:
+            if DetectionResultFields.detection_masks not in detections_dict:
+                raise ValueError('Detection masks not in detections dictionary.')
+            detection_masks = detections_dict[DetectionResultFields.detection_masks]
+        self._evaluation.add_single_detected_image_info(
+            image_key=image_id,
+            detected_boxes=detections_dict[DetectionResultFields.detection_boxes],
+            detected_scores=detections_dict[DetectionResultFields.detection_scores],
+            detected_class_labels=detection_classes,
+            detected_masks=detection_masks)
+
+    def evaluate(self):
+        """Compute evaluation result.
+        Returns:
+          A dictionary of metrics with the following fields -
+          1. summary_metrics:
+                '<prefix if not empty>_Precision/mAP@<matching_iou_threshold>IOU': mean
+                average precision at the specified IOU threshold.
+          2. per_category_ap: category specific results with keys of the form
+                '<prefix if not empty>_PerformanceByCategory/
+                mAP@<matching_iou_threshold>IOU/category'.
+        """
+        metrics = self._evaluation.evaluate()
+        pascal_metrics = {self._metric_names[0]: metrics['mean_ap']}
+        if self._evaluate_corlocs:
+            pascal_metrics[self._metric_names[1]] = metrics['mean_corloc']
+        category_index = create_category_index(self._categories)
+        for idx in range(metrics['per_class_ap'].size):
+            if idx + self._label_id_offset in category_index:
+                category_name = category_index[idx + self._label_id_offset]['name']
+                category_name = unicodedata.normalize('NFKD', category_name)
+                display_name = self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
+                    self._matching_iou_threshold, category_name)
+                pascal_metrics[display_name] = metrics['per_class_ap'][idx]
+
+                # Optionally add precision and recall values
+                if self._evaluate_precision_recall:
+                    display_name = self._metric_prefix + 'PerformanceByCategory/Precision@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name)
+                    pascal_metrics[display_name] = metrics['per_class_precision'][idx]
+                    display_name = self._metric_prefix + 'PerformanceByCategory/Recall@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name)
+                    pascal_metrics[display_name] = metrics['per_class_precision'][idx]
+
+                # Optionally add CorLoc metrics.classes
+                if self._evaluate_corlocs:
+                    display_name = self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
+                        self._matching_iou_threshold, category_name)
+                    pascal_metrics[display_name] = metrics['per_class_corloc'][idx]
+
+        return pascal_metrics
+
+    def clear(self):
+        """Clears the state to prepare for a fresh evaluation."""
+        self._evaluation = ObjectDetectionEvaluation(
+            num_gt_classes=self._num_classes,
+            matching_iou_threshold=self._matching_iou_threshold,
+            use_weighted_mean_ap=self._use_weighted_mean_ap,
+            label_id_offset=self._label_id_offset)
+        self._image_ids.clear()
+
+
+class PascalDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using PASCAL metrics."""
+
+    def __init__(self, categories, matching_iou_threshold=0.5):
+        super(PascalDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold=matching_iou_threshold,
+            evaluate_corlocs=False,
+            metric_prefix='PascalBoxes',
+            use_weighted_mean_ap=False)
+
+
+class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using weighted PASCAL metrics.
+    Weighted PASCAL metrics computes the mean average precision as the average
+    precision given the scores and tp_fp_labels of all classes. In comparison,
+    PASCAL metrics computes the mean average precision as the mean of the
+    per-class average precisions.
+    This definition is very similar to the mean of the per-class average
+    precisions weighted by class frequency. However, they are typically not the
+    same as the average precision is not a linear function of the scores and
+    tp_fp_labels.
+    """
+
+    def __init__(self, categories, matching_iou_threshold=0.5):
+        super(WeightedPascalDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold=matching_iou_threshold,
+            evaluate_corlocs=False,
+            metric_prefix='WeightedPascalBoxes',
+            use_weighted_mean_ap=True)
+
+
+class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using precision@recall metrics."""
+
+    def __init__(self,
+                 categories,
+                 matching_iou_threshold=0.5,
+                 recall_lower_bound=0.,
+                 recall_upper_bound=1.0):
+        super(PrecisionAtRecallDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold=matching_iou_threshold,
+            recall_lower_bound=recall_lower_bound,
+            recall_upper_bound=recall_upper_bound,
+            evaluate_corlocs=False,
+            metric_prefix='PrecisionAtRecallBoxes',
+            use_weighted_mean_ap=False)
+
+
+class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator):
+    """A class to evaluation detections using Open Images V2 metrics.
+      Open Images V2 introduce group_of type of bounding boxes and this metric
+      handles those boxes appropriately.
+    """
+
+    def __init__(self,
+                 categories,
+                 matching_iou_threshold=0.5,
+                 evaluate_masks=False,
+                 evaluate_corlocs=False,
+                 metric_prefix='OpenImagesV5',
+                 group_of_weight=0.0):
+        """Constructor.
+        Args:
+            categories: A list of dicts, each of which has the following keys -
+                'id': (required) an integer id uniquely identifying this category.
+                'name': (required) string representing category name e.g., 'cat', 'dog'.
+            matching_iou_threshold: IOU threshold to use for matching groundtruth
+                boxes to detection boxes.
+            evaluate_masks: if True, evaluator evaluates masks.
+            evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
+            metric_prefix: Prefix name of the metric.
+            group_of_weight: Weight of the group-of bounding box. If set to 0 (default
+                for Open Images V2 detection protocol), detections of the correct class
+                within a group-of box are ignored. If weight is > 0, then if at least
+                one detection falls within a group-of box with matching_iou_threshold,
+                weight group_of_weight is added to true positives. Consequently, if no
+                detection falls within a group-of box, weight group_of_weight is added
+                to false negatives.
+        """
+
+        super(OpenImagesDetectionEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold,
+            evaluate_corlocs,
+            metric_prefix=metric_prefix,
+            group_of_weight=group_of_weight,
+            evaluate_masks=evaluate_masks)
+
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary containing -
+                InputDataFields.gt_boxes: float32 numpy array
+                    of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
+                    the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+                InputDataFields.gt_classes: integer numpy array
+                    of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes.
+                InputDataFields.gt_group_of: Optional length M
+                    numpy boolean array denoting whether a groundtruth box contains a group of instances.
+        Raises:
+            ValueError: On adding groundtruth for an image more than once.
+        """
+        if image_id in self._image_ids:
+            return
+
+        gt_classes = (gt_dict[InputDataFields.gt_classes] - self._label_id_offset)
+        # If the key is not present in the gt_dict or the array is empty
+        # (unless there are no annotations for the groundtruth on this image)
+        # use values from the dictionary or insert None otherwise.
+        if (InputDataFields.gt_group_of in gt_dict and
+                (gt_dict[InputDataFields.gt_group_of].size or not gt_classes.size)):
+            gt_group_of = gt_dict[InputDataFields.gt_group_of]
+        else:
+            gt_group_of = None
+            # FIXME disable warning for now, will add group_of flag eventually
+            # if not len(self._image_ids) % 1000:
+            #     logging.warning('image %s does not have groundtruth group_of flag specified', image_id)
+        if self._evaluate_masks:
+            gt_masks = gt_dict[InputDataFields.gt_instance_masks]
+        else:
+            gt_masks = None
+
+        self._evaluation.add_single_ground_truth_image_info(
+            image_id,
+            gt_dict[InputDataFields.gt_boxes],
+            gt_classes,
+            gt_is_difficult_list=None,
+            gt_is_group_of_list=gt_group_of,
+            gt_masks=gt_masks)
+        self._image_ids.update([image_id])
+
+
+class OpenImagesChallengeEvaluator(OpenImagesDetectionEvaluator):
+    """A class implements Open Images Challenge metrics.
+      Both Detection and Instance Segmentation evaluation metrics are implemented.
+      Open Images Challenge Detection metric has two major changes in comparison
+      with Open Images V2 detection metric:
+      - a custom weight might be specified for detecting an object contained in a group-of box.
+      - verified image-level labels should be explicitly provided for evaluation: in case an
+      image has neither positive nor negative image level label of class c, all detections of
+      this class on this image will be ignored.
+
+      Open Images Challenge Instance Segmentation metric allows to measure performance
+      of models in case of incomplete annotations: some instances are
+      annotations only on box level and some - on image-level. In addition,
+      image-level labels are taken into account as in detection metric.
+
+      Open Images Challenge Detection metric default parameters:
+      evaluate_masks = False
+      group_of_weight = 1.0
+
+      Open Images Challenge Instance Segmentation metric default parameters:
+      evaluate_masks = True
+      (group_of_weight will not matter)
+    """
+
+    def __init__(
+            self,
+            categories,
+            evaluate_masks=False,
+            matching_iou_threshold=0.5,
+            evaluate_corlocs=False,
+            group_of_weight=1.0):
+        """Constructor.
+        Args:
+            categories: A list of dicts, each of which has the following keys -
+                'id': (required) an integer id uniquely identifying this category.
+                'name': (required) string representing category name e.g., 'cat', 'dog'.
+            evaluate_masks: set to true for instance segmentation metric and to false
+                for detection metric.
+            matching_iou_threshold: IOU threshold to use for matching groundtruth
+                boxes to detection boxes.
+            evaluate_corlocs: if True, additionally evaluates and returns CorLoc.
+            group_of_weight: Weight of group-of boxes. If set to 0, detections of the
+                correct class within a group-of box are ignored. If weight is > 0, then
+                if at least one detection falls within a group-of box with
+                matching_iou_threshold, weight group_of_weight is added to true
+                positives. Consequently, if no detection falls within a group-of box,
+                weight group_of_weight is added to false negatives.
+        """
+        if not evaluate_masks:
+            metrics_prefix = 'OpenImagesDetectionChallenge'
+        else:
+            metrics_prefix = 'OpenImagesInstanceSegmentationChallenge'
+
+        super(OpenImagesChallengeEvaluator, self).__init__(
+            categories,
+            matching_iou_threshold,
+            evaluate_masks=evaluate_masks,
+            evaluate_corlocs=evaluate_corlocs,
+            group_of_weight=group_of_weight,
+            metric_prefix=metrics_prefix)
+
+        self._evaluatable_labels = {}
+
+    def add_single_ground_truth_image_info(self, image_id, gt_dict):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_id: A unique string/integer identifier for the image.
+            gt_dict: A dictionary containing -
+                InputDataFields.gt_boxes: float32 numpy array of shape [num_boxes, 4]
+                    containing `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax]
+                    in absolute image coordinates.
+                InputDataFields.gt_classes: integer numpy array of shape [num_boxes]
+                    containing 1-indexed groundtruth classes for the boxes.
+                InputDataFields.gt_image_classes: integer 1D
+                    numpy array containing all classes for which labels are verified.
+                InputDataFields.gt_group_of: Optional length M
+                numpy boolean array denoting whether a groundtruth box contains a group of instances.
+        Raises:
+            ValueError: On adding groundtruth for an image more than once.
+        """
+        super(OpenImagesChallengeEvaluator,
+              self).add_single_ground_truth_image_info(image_id, gt_dict)
+        input_fields = InputDataFields
+        gt_classes = gt_dict[input_fields.gt_classes] - self._label_id_offset
+        image_classes = np.array([], dtype=int)
+        if input_fields.gt_image_classes in gt_dict:
+            image_classes = gt_dict[input_fields.gt_image_classes]
+        elif input_fields.gt_labeled_classes in gt_dict:
+            image_classes = gt_dict[input_fields.gt_labeled_classes]
+        image_classes -= self._label_id_offset
+        self._evaluatable_labels[image_id] = np.unique(
+            np.concatenate((image_classes, gt_classes)))
+
+    def add_single_detected_image_info(self, image_id, detections_dict):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+          image_id: A unique string/integer identifier for the image.
+          detections_dict: A dictionary containing -
+            DetectionResultFields.detection_boxes: float32 numpy
+              array of shape [num_boxes, 4] containing `num_boxes` detection boxes
+              of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+            DetectionResultFields.detection_scores: float32 numpy
+              array of shape [num_boxes] containing detection scores for the boxes.
+            DetectionResultFields.detection_classes: integer numpy
+              array of shape [num_boxes] containing 1-indexed detection classes for
+              the boxes.
+        Raises:
+          ValueError: If detection masks are not in detections dictionary.
+        """
+        if image_id not in self._image_ids:
+            # Since for the correct work of evaluator it is assumed that groundtruth
+            # is inserted first we make sure to break the code if is it not the case.
+            self._image_ids.update([image_id])
+            self._evaluatable_labels[image_id] = np.array([])
+
+        detection_classes = detections_dict[DetectionResultFields.detection_classes] - self._label_id_offset
+        allowed_classes = np.where(np.isin(detection_classes, self._evaluatable_labels[image_id]))
+        detection_classes = detection_classes[allowed_classes]
+        detected_boxes = detections_dict[DetectionResultFields.detection_boxes][allowed_classes]
+        detected_scores = detections_dict[DetectionResultFields.detection_scores][allowed_classes]
+
+        if self._evaluate_masks:
+            detection_masks = detections_dict[DetectionResultFields.detection_masks][allowed_classes]
+        else:
+            detection_masks = None
+        self._evaluation.add_single_detected_image_info(
+            image_key=image_id,
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detection_classes,
+            detected_masks=detection_masks)
+
+    def clear(self):
+        """Clears stored data."""
+
+        super(OpenImagesChallengeEvaluator, self).clear()
+        self._evaluatable_labels.clear()
+
diff --git a/efficientdet/effdet/evaluation/fields.py b/efficientdet/effdet/evaluation/fields.py
new file mode 100644
index 0000000000000000000000000000000000000000..d029b77dc5c4ec79aba1a6021760981fc23d3096
--- /dev/null
+++ b/efficientdet/effdet/evaluation/fields.py
@@ -0,0 +1,105 @@
+
+class InputDataFields(object):
+    """Names for the input tensors.
+    Holds the standard data field names to use for identifying input tensors. This
+    should be used by the decoder to identify keys for the returned tensor_dict
+    containing input tensors. And it should be used by the model to identify the
+    tensors it needs.
+    Attributes:
+        image: image.
+        image_additional_channels: additional channels.
+        key: unique key corresponding to image.
+        filename: original filename of the dataset (without common path).
+        gt_image_classes: image-level class labels.
+        gt_image_confidences: image-level class confidences.
+        gt_labeled_classes: image-level annotation that indicates the
+            classes for which an image has been labeled.
+        gt_boxes: coordinates of the ground truth boxes in the image.
+        gt_classes: box-level class labels.
+        gt_confidences: box-level class confidences. The shape should be
+            the same as the shape of gt_classes.
+        gt_label_types: box-level label types (e.g. explicit negative).
+        gt_is_crowd: [DEPRECATED, use gt_group_of instead]
+            is the groundtruth a single object or a crowd.
+        gt_area: area of a groundtruth segment.
+        gt_difficult: is a `difficult` object
+        gt_group_of: is a `group_of` objects, e.g. multiple objects of the
+            same class, forming a connected group, where instances are heavily
+            occluding each other.
+        gt_instance_masks: ground truth instance masks.
+        gt_instance_boundaries: ground truth instance boundaries.
+        gt_instance_classes: instance mask-level class labels.
+        gt_label_weights: groundtruth label weights.
+        gt_weights: groundtruth weight factor for bounding boxes.
+        image_height: height of images, used to decode
+        image_width: width of images, used to decode
+    """
+    image = 'image'
+    key = 'image_id'
+    filename = 'filename'
+    gt_boxes = 'bbox'
+    gt_classes = 'cls'
+    gt_confidences = 'confidences'
+    gt_label_types = 'label_types'
+    gt_image_classes = 'img_cls'
+    gt_image_confidences = 'img_confidences'
+    gt_labeled_classes = 'labeled_cls'
+    gt_is_crowd = 'is_crowd'
+    gt_area = 'area'
+    gt_difficult = 'difficult'
+    gt_group_of = 'group_of'
+    gt_instance_masks = 'instance_masks'
+    gt_instance_boundaries = 'instance_boundaries'
+    gt_instance_classes = 'instance_classes'
+    image_height = 'img_height'
+    image_width = 'img_width'
+    image_size = 'img_size'
+
+
+class DetectionResultFields(object):
+    """Naming conventions for storing the output of the detector.
+    Attributes:
+        source_id: source of the original image.
+        key: unique key corresponding to image.
+        detection_boxes: coordinates of the detection boxes in the image.
+        detection_scores: detection scores for the detection boxes in the image.
+        detection_multiclass_scores: class score distribution (including background)
+            for detection boxes in the image including background class.
+        detection_classes: detection-level class labels.
+        detection_masks: contains a segmentation mask for each detection box.
+    """
+
+    key = 'image_id'
+    detection_boxes = 'bbox'
+    detection_scores = 'score'
+    detection_classes = 'cls'
+    detection_masks = 'masks'
+
+
+class BoxListFields(object):
+    """Naming conventions for BoxLists.
+    Attributes:
+        boxes: bounding box coordinates.
+        classes: classes per bounding box.
+        scores: scores per bounding box.
+        weights: sample weights per bounding box.
+        objectness: objectness score per bounding box.
+        masks: masks per bounding box.
+        boundaries: boundaries per bounding box.
+        keypoints: keypoints per bounding box.
+        keypoint_heatmaps: keypoint heatmaps per bounding box.
+        is_crowd: is_crowd annotation per bounding box.
+    """
+    boxes = 'boxes'
+    classes = 'classes'
+    scores = 'scores'
+    weights = 'weights'
+    confidences = 'confidences'
+    objectness = 'objectness'
+    masks = 'masks'
+    boundaries = 'boundaries'
+    keypoints = 'keypoints'
+    keypoint_visibilities = 'keypoint_visibilities'
+    keypoint_heatmaps = 'keypoint_heatmaps'
+    is_crowd = 'is_crowd'
+    group_of = 'group_of'
diff --git a/efficientdet/effdet/evaluation/metrics.py b/efficientdet/effdet/evaluation/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..01a73ef028e9e9da8d2fe5c51d5736ca958e938b
--- /dev/null
+++ b/efficientdet/effdet/evaluation/metrics.py
@@ -0,0 +1,148 @@
+import numpy as np
+
+
+def compute_precision_recall(scores, labels, num_gt):
+    """Compute precision and recall.
+    Args:
+        scores: A float numpy array representing detection score
+        labels: A float numpy array representing weighted true/false positive labels
+        num_gt: Number of ground truth instances
+    Raises:
+        ValueError: if the input is not of the correct format
+    Returns:
+        precision: Fraction of positive instances over detected ones. This value is
+            None if no ground truth labels are present.
+        recall: Fraction of detected positive instance over all positive instances.
+            This value is None if no ground truth labels are present.
+    """
+    if not isinstance(labels, np.ndarray) or len(labels.shape) != 1:
+        raise ValueError("labels must be single dimension numpy array")
+
+    if labels.dtype != np.float and labels.dtype != np.bool:
+        raise ValueError("labels type must be either bool or float")
+
+    if not isinstance(scores, np.ndarray) or len(scores.shape) != 1:
+        raise ValueError("scores must be single dimension numpy array")
+
+    if num_gt < np.sum(labels):
+        raise ValueError("Number of true positives must be smaller than num_gt.")
+
+    if len(scores) != len(labels):
+        raise ValueError("scores and labels must be of the same size.")
+
+    if num_gt == 0:
+        return None, None
+
+    sorted_indices = np.argsort(scores)
+    sorted_indices = sorted_indices[::-1]
+    true_positive_labels = labels[sorted_indices]
+    false_positive_labels = (true_positive_labels <= 0).astype(float)
+    cum_true_positives = np.cumsum(true_positive_labels)
+    cum_false_positives = np.cumsum(false_positive_labels)
+    precision = cum_true_positives.astype(float) / (cum_true_positives + cum_false_positives)
+    recall = cum_true_positives.astype(float) / num_gt
+    return precision, recall
+
+
+def compute_average_precision(precision, recall):
+    """Compute Average Precision according to the definition in VOCdevkit.
+    Precision is modified to ensure that it does not decrease as recall
+    decrease.
+    Args:
+        precision: A float [N, 1] numpy array of precisions
+        recall: A float [N, 1] numpy array of recalls
+    Raises:
+        ValueError: if the input is not of the correct format
+    Returns:
+        average_precison: The area under the precision recall curve. NaN if
+            precision and recall are None.
+    """
+    if precision is None:
+        if recall is not None:
+            raise ValueError("If precision is None, recall must also be None")
+        return np.NAN
+
+    if not isinstance(precision, np.ndarray) or not isinstance(recall, np.ndarray):
+        raise ValueError("precision and recall must be numpy array")
+    if precision.dtype != np.float or recall.dtype != np.float:
+        raise ValueError("input must be float numpy array.")
+    if len(precision) != len(recall):
+        raise ValueError("precision and recall must be of the same size.")
+    if not precision.size:
+        return 0.0
+    if np.amin(precision) < 0 or np.amax(precision) > 1:
+        raise ValueError("Precision must be in the range of [0, 1].")
+    if np.amin(recall) < 0 or np.amax(recall) > 1:
+        raise ValueError("recall must be in the range of [0, 1].")
+    if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
+        raise ValueError("recall must be a non-decreasing array")
+
+    recall = np.concatenate([[0], recall, [1]])
+    precision = np.concatenate([[0], precision, [0]])
+
+    # Preprocess precision to be a non-decreasing array
+    for i in range(len(precision) - 2, -1, -1):
+        precision[i] = np.maximum(precision[i], precision[i + 1])
+
+    indices = np.where(recall[1:] != recall[:-1])[0] + 1
+    average_precision = np.sum((recall[indices] - recall[indices - 1]) * precision[indices])
+    return average_precision
+
+
+def compute_cor_loc(num_gt_imgs_per_class, num_images_correctly_detected_per_class):
+    """Compute CorLoc according to the definition in the following paper.
+    https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
+    Returns nans if there are no ground truth images for a class.
+    Args:
+        num_gt_imgs_per_class: 1D array, representing number of images containing
+            at least one object instance of a particular class
+        num_images_correctly_detected_per_class: 1D array, representing number of
+            images that are correctly detected at least one object instance of a particular class
+    Returns:
+        corloc_per_class: A float numpy array represents the corloc score of each class
+    """
+    return np.where(
+        num_gt_imgs_per_class == 0, np.nan,
+        num_images_correctly_detected_per_class / num_gt_imgs_per_class)
+
+
+def compute_median_rank_at_k(tp_fp_list, k):
+    """Computes MedianRank@k, where k is the top-scoring labels.
+    Args:
+        tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
+            detection on a single image, where the detections are sorted by score in
+            descending order. Further, each numpy array element can have boolean or
+            float values. True positive elements have either value >0.0 or True;
+            any other value is considered false positive.
+        k: number of top-scoring proposals to take.
+    Returns:
+        median_rank: median rank of all true positive proposals among top k by score.
+    """
+    ranks = []
+    for i in range(len(tp_fp_list)):
+        ranks.append(np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0])
+    concatenated_ranks = np.concatenate(ranks)
+    return np.median(concatenated_ranks)
+
+
+def compute_recall_at_k(tp_fp_list, num_gt, k):
+    """Computes Recall@k, MedianRank@k, where k is the top-scoring labels.
+    Args:
+        tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all
+            detection on a single image, where the detections are sorted by score in
+            descending order. Further, each numpy array element can have boolean or
+            float values. True positive elements have either value >0.0 or True;
+            any other value is considered false positive.
+        num_gt: number of groundtruth anotations.
+        k: number of top-scoring proposals to take.
+    Returns:
+        recall: recall evaluated on the top k by score detections.
+    """
+
+    tp_fp_eval = []
+    for i in range(len(tp_fp_list)):
+        tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])])
+
+    tp_fp_eval = np.concatenate(tp_fp_eval)
+
+    return np.sum(tp_fp_eval) / num_gt
diff --git a/efficientdet/effdet/evaluation/np_box_list.py b/efficientdet/effdet/evaluation/np_box_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..60e3b44c32a72f088ca3b736d24a62aac252d795
--- /dev/null
+++ b/efficientdet/effdet/evaluation/np_box_list.py
@@ -0,0 +1,696 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List operations for Numpy BoxLists.
+
+Example box operations that are supported:
+  * Areas: compute bounding box areas
+  * IOU: pairwise intersection-over-union scores
+"""
+import numpy as np
+
+
+class BoxList(object):
+    """Box collection.
+    BoxList represents a list of bounding boxes as numpy array, where each
+    bounding box is represented as a row of 4 numbers,
+    [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
+    given list correspond to a single image.
+    Optionally, users can add additional related fields (such as
+    objectness/classification scores).
+    """
+
+    def __init__(self, data):
+        """Constructs box collection.
+        Args:
+          data: a numpy array of shape [N, 4] representing box coordinates
+        Raises:
+          ValueError: if bbox data is not a numpy array
+          ValueError: if invalid dimensions for bbox data
+        """
+        if not isinstance(data, np.ndarray):
+            raise ValueError('data must be a numpy array.')
+        if len(data.shape) != 2 or data.shape[1] != 4:
+            raise ValueError('Invalid dimensions for box data.')
+        if data.dtype != np.float32 and data.dtype != np.float64:
+            raise ValueError('Invalid data type for box data: float is required.')
+        if not self._is_valid_boxes(data):
+            raise ValueError('Invalid box data. data must be a numpy array of '
+                             'N*[y_min, x_min, y_max, x_max]')
+        self.data = {'boxes': data}
+
+    def num_boxes(self):
+        """Return number of boxes held in collections."""
+        return self.data['boxes'].shape[0]
+
+    def get_extra_fields(self):
+        """Return all non-box fields."""
+        return [k for k in self.data.keys() if k != 'boxes']
+
+    def has_field(self, field):
+        return field in self.data
+
+    def add_field(self, field, field_data):
+        """Add data to a specified field.
+        Args:
+          field: a string parameter used to speficy a related field to be accessed.
+          field_data: a numpy array of [N, ...] representing the data associated
+              with the field.
+        Raises:
+          ValueError: if the field is already exist or the dimension of the field
+              data does not matches the number of boxes.
+        """
+        if self.has_field(field):
+            raise ValueError('Field ' + field + 'already exists')
+        if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
+            raise ValueError('Invalid dimensions for field data')
+        self.data[field] = field_data
+
+    def get(self):
+        """Convenience function for accesssing box coordinates.
+        Returns:
+          a numpy array of shape [N, 4] representing box corners
+        """
+        return self.get_field('boxes')
+
+    def get_field(self, field):
+        """Accesses data associated with the specified field in the box collection.
+        Args:
+          field: a string parameter used to speficy a related field to be accessed.
+        Returns:
+          a numpy 1-d array representing data of an associated field
+        Raises:
+          ValueError: if invalid field
+        """
+        if not self.has_field(field):
+            raise ValueError('field {} does not exist'.format(field))
+        return self.data[field]
+
+    def get_coordinates(self):
+        """Get corner coordinates of boxes.
+        Returns:
+         a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
+        """
+        box_coordinates = self.get()
+        y_min = box_coordinates[:, 0]
+        x_min = box_coordinates[:, 1]
+        y_max = box_coordinates[:, 2]
+        x_max = box_coordinates[:, 3]
+        return [y_min, x_min, y_max, x_max]
+
+    def _is_valid_boxes(self, data):
+        """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
+        Args:
+          data: a numpy array of shape [N, 4] representing box coordinates
+        Returns:
+          a boolean indicating whether all ymax of boxes are equal or greater than
+              ymin, and all xmax of boxes are equal or greater than xmin.
+        """
+        if data.shape[0] > 0:
+            for i in range(data.shape[0]):
+                if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
+                    return False
+        return True
+
+
+def area(boxes):
+    """Computes area of boxes.
+
+    Args:
+      boxes: Numpy array with shape [N, 4] holding N boxes
+
+    Returns:
+      a numpy array with shape [N*1] representing box areas
+    """
+    return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+
+
+def intersection(boxes1, boxes2):
+    """Compute pairwise intersection areas between boxes.
+
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes
+      boxes2: a numpy array with shape [M, 4] holding M boxes
+
+    Returns:
+      a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
+    [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
+
+    all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
+    all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
+    intersect_heights = np.maximum(np.zeros(all_pairs_max_ymin.shape), all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
+    all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
+    intersect_widths = np.maximum(np.zeros(all_pairs_max_xmin.shape), all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+
+
+def iou(boxes1, boxes2):
+    """Computes pairwise intersection-over-union between box collections.
+
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding N boxes.
+
+    Returns:
+      a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    area1 = area(boxes1)
+    area2 = area(boxes2)
+    union = np.expand_dims(area1, axis=1) + np.expand_dims(area2, axis=0) - intersect
+    return intersect / union
+
+
+def ioa(boxes1, boxes2):
+    """Computes pairwise intersection-over-area between box collections.
+
+    Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+    their intersection area over box2's area. Note that ioa is not symmetric,
+    that is, IOA(box1, box2) != IOA(box2, box1).
+
+    Args:
+      boxes1: a numpy array with shape [N, 4] holding N boxes.
+      boxes2: a numpy array with shape [M, 4] holding N boxes.
+
+    Returns:
+      a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    intersect = intersection(boxes1, boxes2)
+    areas = np.expand_dims(area(boxes2), axis=0)
+    return intersect / areas
+
+
+class SortOrder(object):
+    """Enum class for sort order.
+
+    Attributes:
+      ascend: ascend order.
+      descend: descend order.
+    """
+    ASCEND = 1
+    DESCEND = 2
+
+
+def area_boxlist(boxlist):
+    """Computes area of boxes.
+
+    Args:
+      boxlist: BoxList holding N boxes
+
+    Returns:
+      a numpy array with shape [N*1] representing box areas
+    """
+    y_min, x_min, y_max, x_max = boxlist.get_coordinates()
+    return (y_max - y_min) * (x_max - x_min)
+
+
+def intersection_boxlist(boxlist1, boxlist2):
+    """Compute pairwise intersection areas between boxes.
+
+    Args:
+      boxlist1: BoxList holding N boxes
+      boxlist2: BoxList holding M boxes
+
+    Returns:
+      a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    return intersection(boxlist1.get(), boxlist2.get())
+
+
+def iou_boxlist(boxlist1, boxlist2):
+    """Computes pairwise intersection-over-union between box collections.
+
+    Args:
+      boxlist1: BoxList holding N boxes
+      boxlist2: BoxList holding M boxes
+
+    Returns:
+      a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    return iou(boxlist1.get(), boxlist2.get())
+
+
+def ioa_boxlist(boxlist1, boxlist2):
+    """Computes pairwise intersection-over-area between box collections.
+
+    Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
+    their intersection area over box2's area. Note that ioa is not symmetric,
+    that is, IOA(box1, box2) != IOA(box2, box1).
+
+    Args:
+      boxlist1: BoxList holding N boxes
+      boxlist2: BoxList holding M boxes
+
+    Returns:
+      a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    return ioa(boxlist1.get(), boxlist2.get())
+
+
+def gather_boxlist(boxlist, indices, fields=None):
+    """Gather boxes from BoxList according to indices and return new BoxList.
+
+    By default, gather returns boxes corresponding to the input index list, as
+    well as all additional fields stored in the boxlist (indexing into the
+    first dimension).  However one can optionally only gather from a
+    subset of fields.
+
+    Args:
+      boxlist: BoxList holding N boxes
+      indices: a 1-d numpy array of type int_
+      fields: (optional) list of fields to also gather from.  If None (default),
+          all fields are gathered from.  Pass an empty fields list to only gather the box coordinates.
+
+    Returns:
+      subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices
+
+    Raises:
+      ValueError: if specified field is not contained in boxlist or if the indices are not of type int_
+    """
+    if indices.size:
+        if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0:
+            raise ValueError('indices are out of valid range.')
+    subboxlist = BoxList(boxlist.get()[indices, :])
+    if fields is None:
+        fields = boxlist.get_extra_fields()
+    for field in fields:
+        extra_field_data = boxlist.get_field(field)
+        subboxlist.add_field(field, extra_field_data[indices, ...])
+    return subboxlist
+
+
+def sort_by_field_boxlist(boxlist, field, order=SortOrder.DESCEND):
+    """Sort boxes and associated fields according to a scalar field.
+
+    A common use case is reordering the boxes according to descending scores.
+
+    Args:
+        boxlist: BoxList holding N boxes.
+        field: A BoxList field for sorting and reordering the BoxList.
+        order: (Optional) 'descend' or 'ascend'. Default is descend.
+
+    Returns:
+      sorted_boxlist: A sorted BoxList with the field in the specified order.
+
+    Raises:
+        ValueError: if specified field does not exist or is not of single dimension.
+        ValueError: if the order is not either descend or ascend.
+    """
+    if not boxlist.has_field(field):
+        raise ValueError('Field ' + field + ' does not exist')
+    if len(boxlist.get_field(field).shape) != 1:
+        raise ValueError('Field ' + field + 'should be single dimension.')
+    if order != SortOrder.DESCEND and order != SortOrder.ASCEND:
+        raise ValueError('Invalid sort order')
+
+    field_to_sort = boxlist.get_field(field)
+    sorted_indices = np.argsort(field_to_sort)
+    if order == SortOrder.DESCEND:
+        sorted_indices = sorted_indices[::-1]
+    return gather_boxlist(boxlist, sorted_indices)
+
+
+def non_max_suppression(boxlist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0):
+    """Non maximum suppression.
+
+    This op greedily selects a subset of detection bounding boxes, pruning
+    away boxes that have high IOU (intersection over union) overlap (> thresh)
+    with already selected boxes. In each iteration, the detected bounding box with
+    highest score in the available pool is selected.
+
+    Args:
+        boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+            representing detection scores. All scores belong to the same class.
+        max_output_size: maximum number of retained boxes
+        iou_threshold: intersection over union threshold.
+        score_threshold: minimum score threshold. Remove the boxes with scores less than
+            this value. Default value is set to -10. A very low threshold to pass pretty
+            much all the boxes, unless the user sets a different score threshold.
+
+    Returns:
+        a BoxList holding M boxes where M <= max_output_size
+    Raises:
+        ValueError: if 'scores' field does not exist
+        ValueError: if threshold is not in [0, 1]
+      ValueError: if max_output_size < 0
+    """
+    if not boxlist.has_field('scores'):
+        raise ValueError('Field scores does not exist')
+    if iou_threshold < 0. or iou_threshold > 1.0:
+        raise ValueError('IOU threshold must be in [0, 1]')
+    if max_output_size < 0:
+        raise ValueError('max_output_size must be bigger than 0.')
+
+    boxlist = filter_scores_greater_than(boxlist, score_threshold)
+    if boxlist.num_boxes() == 0:
+        return boxlist
+
+    boxlist = sort_by_field_boxlist(boxlist, 'scores')
+
+    # Prevent further computation if NMS is disabled.
+    if iou_threshold == 1.0:
+        if boxlist.num_boxes() > max_output_size:
+            selected_indices = np.arange(max_output_size)
+            return gather_boxlist(boxlist, selected_indices)
+        else:
+            return boxlist
+
+    boxes = boxlist.get()
+    num_boxes = boxlist.num_boxes()
+    # is_index_valid is True only for all remaining valid boxes,
+    is_index_valid = np.full(num_boxes, 1, dtype=bool)
+    selected_indices = []
+    num_output = 0
+    for i in range(num_boxes):
+        if num_output < max_output_size:
+            if is_index_valid[i]:
+                num_output += 1
+                selected_indices.append(i)
+                is_index_valid[i] = False
+                valid_indices = np.where(is_index_valid)[0]
+                if valid_indices.size == 0:
+                    break
+
+                intersect_over_union = iou(np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :])
+                intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+                is_index_valid[valid_indices] = np.logical_and(
+                    is_index_valid[valid_indices],
+                    intersect_over_union <= iou_threshold)
+    return gather_boxlist(boxlist, np.array(selected_indices))
+
+
+def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh, max_output_size):
+    """Multi-class version of non maximum suppression.
+
+    This op greedily selects a subset of detection bounding boxes, pruning
+    away boxes that have high IOU (intersection over union) overlap (> thresh)
+    with already selected boxes.  It operates independently for each class for
+    which scores are provided (via the scores field of the input box_list),
+    pruning boxes with score less than a provided threshold prior to
+    applying NMS.
+
+    Args:
+        boxlist: BoxList holding N boxes.  Must contain a 'scores' field
+            representing detection scores.  This scores field is a tensor that can
+            be 1 dimensional (in the case of a single class) or 2-dimensional, which
+            which case we assume that it takes the shape [num_boxes, num_classes].
+            We further assume that this rank is known statically and that
+            scores.shape[1] is also known (i.e., the number of classes is fixed
+            and known at graph construction time).
+        score_thresh: scalar threshold for score (low scoring boxes are removed).
+        iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
+            with previously selected boxes are removed).
+        max_output_size: maximum number of retained boxes per class.
+
+    Returns:
+        a BoxList holding M boxes with a rank-1 scores field representing
+            corresponding scores for each box with scores sorted in decreasing order
+            and a rank-1 classes field representing a class label for each box.
+    Raises:
+        ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+            a valid scores field.
+    """
+    if not 0 <= iou_thresh <= 1.0:
+        raise ValueError('thresh must be between 0 and 1')
+    if not isinstance(boxlist, BoxList):
+        raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+        raise ValueError('input boxlist must have \'scores\' field')
+    scores = boxlist.get_field('scores')
+    if len(scores.shape) == 1:
+        scores = np.reshape(scores, [-1, 1])
+    elif len(scores.shape) == 2:
+        if scores.shape[1] is None:
+            raise ValueError('scores field must have statically defined second dimension')
+    else:
+        raise ValueError('scores field must be of rank 1 or 2')
+    num_boxes = boxlist.num_boxes()
+    num_scores = scores.shape[0]
+    num_classes = scores.shape[1]
+
+    if num_boxes != num_scores:
+        raise ValueError('Incorrect scores field length: actual vs expected.')
+
+    selected_boxes_list = []
+    for class_idx in range(num_classes):
+        boxlist_and_class_scores = BoxList(boxlist.get())
+        class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+        boxlist_and_class_scores.add_field('scores', class_scores)
+        boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores, score_thresh)
+        nms_result = non_max_suppression(
+            boxlist_filt, max_output_size=max_output_size, iou_threshold=iou_thresh, score_threshold=score_thresh)
+        nms_result.add_field('classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
+        selected_boxes_list.append(nms_result)
+    selected_boxes = concatenate_boxlist(selected_boxes_list)
+    sorted_boxes = sort_by_field_boxlist(selected_boxes, 'scores')
+    return sorted_boxes
+
+
+def scale(boxlist, y_scale, x_scale):
+    """Scale box coordinates in x and y dimensions.
+
+    Args:
+        boxlist: BoxList holding N boxes
+        y_scale: float
+        x_scale: float
+
+    Returns:
+        boxlist: BoxList holding N boxes
+    """
+    y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+    y_min = y_scale * y_min
+    y_max = y_scale * y_max
+    x_min = x_scale * x_min
+    x_max = x_scale * x_max
+    scaled_boxlist = BoxList(np.hstack([y_min, x_min, y_max, x_max]))
+
+    fields = boxlist.get_extra_fields()
+    for field in fields:
+        extra_field_data = boxlist.get_field(field)
+        scaled_boxlist.add_field(field, extra_field_data)
+
+    return scaled_boxlist
+
+
+def clip_to_window(boxlist, window, filter_nonoverlapping=True):
+    """Clip bounding boxes to a window.
+
+    This op clips input bounding boxes (represented by bounding box
+    corners) to a window, optionally filtering out boxes that do not
+    overlap at all with the window.
+
+    Args:
+        boxlist: BoxList holding M_in boxes
+        window: a numpy array of shape [4] representing the [y_min, x_min, y_max, x_max]
+            window to which the op should clip boxes.
+        filter_nonoverlapping: whether to filter out boxes that do not overlap at all with the window.
+
+    Returns:
+        a BoxList holding M_out boxes where M_out <= M_in
+    """
+    y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+    win_y_min = window[0]
+    win_x_min = window[1]
+    win_y_max = window[2]
+    win_x_max = window[3]
+    y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min)
+    y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min)
+    x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min)
+    x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min)
+    clipped = BoxList(np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped]))
+    clipped = _copy_extra_fields(clipped, boxlist)
+    if filter_nonoverlapping:
+        areas = area(clipped)
+        nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)), [-1]).astype(np.int32)
+        clipped = gather_boxlist(clipped, nonzero_area_indices)
+    return clipped
+
+
+def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0):
+    """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+
+    For each box in boxlist1, we want its IOA to be more than minoverlap with
+    at least one of the boxes in boxlist2. If it does not, we remove it.
+
+    Args:
+        boxlist1: BoxList holding N boxes.
+        boxlist2: BoxList holding M boxes.
+        minoverlap: Minimum required overlap between boxes, to count them as overlapping.
+
+    Returns:
+        A pruned boxlist with size [N', 4].
+    """
+    intersection_over_area = ioa(boxlist2, boxlist1)  # [M, N] tensor
+    intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+    keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+    keep_inds = np.nonzero(keep_bool)[0]
+    new_boxlist1 = gather_boxlist(boxlist1, keep_inds)
+    return new_boxlist1
+
+
+def prune_outside_window(boxlist, window):
+    """Prunes bounding boxes that fall outside a given window.
+
+    This function prunes bounding boxes that even partially fall outside the given
+    window. See also ClipToWindow which only prunes bounding boxes that fall
+    completely outside the window, and clips any bounding boxes that partially
+    overflow.
+
+    Args:
+        boxlist: a BoxList holding M_in boxes.
+        window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax] of the window.
+
+    Returns:
+        pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
+        valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor.
+    """
+
+    y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)
+    win_y_min = window[0]
+    win_x_min = window[1]
+    win_y_max = window[2]
+    win_x_max = window[3]
+    coordinate_violations = np.hstack([
+        np.less(y_min, win_y_min), np.less(x_min, win_x_min),
+        np.greater(y_max, win_y_max), np.greater(x_max, win_x_max)])
+    valid_indices = np.reshape(np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1])
+    return gather_boxlist(boxlist, valid_indices), valid_indices
+
+
+def concatenate_boxlist(boxlists, fields=None):
+    """Concatenate list of BoxLists.
+
+    This op concatenates a list of input BoxLists into a larger BoxList.  It also
+    handles concatenation of BoxList fields as long as the field tensor shapes
+    are equal except for the first dimension.
+
+    Args:
+      boxlists: list of BoxList objects
+      fields: optional list of fields to also concatenate.  By default, all
+        fields from the first BoxList in the list are included in the concatenation.
+
+    Returns:
+      a BoxList with number of boxes equal to
+        sum([boxlist.num_boxes() for boxlist in BoxList])
+    Raises:
+      ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+        contains non BoxList objects), or if requested fields are not contained in all boxlists
+    """
+    if not isinstance(boxlists, list):
+        raise ValueError('boxlists should be a list')
+    if not boxlists:
+        raise ValueError('boxlists should have nonzero length')
+    for boxlist in boxlists:
+        if not isinstance(boxlist, BoxList):
+            raise ValueError('all elements of boxlists should be BoxList objects')
+    concatenated = BoxList(np.vstack([boxlist.get() for boxlist in boxlists]))
+    if fields is None:
+        fields = boxlists[0].get_extra_fields()
+    for field in fields:
+        first_field_shape = boxlists[0].get_field(field).shape
+        first_field_shape = first_field_shape[1:]
+        for boxlist in boxlists:
+            if not boxlist.has_field(field):
+                raise ValueError('boxlist must contain all requested fields')
+            field_shape = boxlist.get_field(field).shape
+            field_shape = field_shape[1:]
+            if field_shape != first_field_shape:
+                raise ValueError('field %s must have same shape for all boxlists '
+                                 'except for the 0th dimension.' % field)
+        concatenated_field = np.concatenate([boxlist.get_field(field) for boxlist in boxlists], axis=0)
+        concatenated.add_field(field, concatenated_field)
+    return concatenated
+
+
+def filter_scores_greater_than(boxlist, thresh):
+    """Filter to keep only boxes with score exceeding a given threshold.
+
+    This op keeps the collection of boxes whose corresponding scores are
+    greater than the input threshold.
+
+    Args:
+      boxlist: BoxList holding N boxes.  Must contain a 'scores' field representing detection scores.
+      thresh: scalar threshold
+
+    Returns:
+      a BoxList holding M boxes where M <= N
+
+    Raises:
+      ValueError: if boxlist not a BoxList object or if it does not have a scores field
+    """
+    if not isinstance(boxlist, BoxList):
+        raise ValueError('boxlist must be a BoxList')
+    if not boxlist.has_field('scores'):
+        raise ValueError('input boxlist must have \'scores\' field')
+    scores = boxlist.get_field('scores')
+    if len(scores.shape) > 2:
+        raise ValueError('Scores should have rank 1 or 2')
+    if len(scores.shape) == 2 and scores.shape[1] != 1:
+        raise ValueError('Scores should have rank 1 or have shape '
+                         'consistent with [None, 1]')
+    high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), [-1]).astype(np.int32)
+    return gather_boxlist(boxlist, high_score_indices)
+
+
+def change_coordinate_frame(boxlist, window):
+    """Change coordinate frame of the boxlist to be relative to window's frame.
+
+    Given a window of the form [ymin, xmin, ymax, xmax],
+    changes bounding box coordinates from boxlist to be relative to this window
+    (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+
+    An example use case is data augmentation: where we are given groundtruth
+    boxes (boxlist) and would like to randomly crop the image to some
+    window (window). In this case we need to change the coordinate frame of
+    each groundtruth box to be relative to this new window.
+
+    Args:
+      boxlist: A BoxList object holding N boxes.
+      window: a size 4 1-D numpy array.
+
+    Returns:
+      Returns a BoxList object with N boxes.
+    """
+    win_height = window[2] - window[0]
+    win_width = window[3] - window[1]
+    boxlist_new = scale(
+        BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width)
+    _copy_extra_fields(boxlist_new, boxlist)
+
+    return boxlist_new
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+    """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+    Args:
+      boxlist_to_copy_to: BoxList to which extra fields are copied.
+      boxlist_to_copy_from: BoxList from which fields are copied.
+
+    Returns:
+      boxlist_to_copy_to with extra fields.
+    """
+    for field in boxlist_to_copy_from.get_extra_fields():
+        boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+    return boxlist_to_copy_to
+
+
+def _update_valid_indices_by_removing_high_iou_boxes(
+        selected_indices, is_index_valid, intersect_over_union, threshold):
+    max_iou = np.max(intersect_over_union[:, selected_indices], axis=1)
+    return np.logical_and(is_index_valid, max_iou <= threshold)
diff --git a/efficientdet/effdet/evaluation/np_mask_list.py b/efficientdet/effdet/evaluation/np_mask_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..22cdb8770ffb6ce1e4f0233ca814273ca29bbf8f
--- /dev/null
+++ b/efficientdet/effdet/evaluation/np_mask_list.py
@@ -0,0 +1,478 @@
+import numpy as np
+from .np_box_list import *
+
+EPSILON = 1e-7
+
+
+class MaskList(BoxList):
+    """Convenience wrapper for BoxList with masks.
+  
+    BoxMaskList extends the np_box_list.BoxList to contain masks as well.
+    In particular, its constructor receives both boxes and masks. Note that the
+    masks correspond to the full image.
+    """
+
+    def __init__(self, box_data, mask_data):
+        """Constructs box collection.
+    
+        Args:
+            box_data: a numpy array of shape [N, 4] representing box coordinates
+            mask_data: a numpy array of shape [N, height, width] representing masks
+                with values are in {0,1}. The masks correspond to the full
+                image. The height and the width will be equal to image height and width.
+    
+        Raises:
+            ValueError: if bbox data is not a numpy array
+            ValueError: if invalid dimensions for bbox data
+            ValueError: if mask data is not a numpy array
+            ValueError: if invalid dimension for mask data
+        """
+        super(MaskList, self).__init__(box_data)
+        if not isinstance(mask_data, np.ndarray):
+            raise ValueError('Mask data must be a numpy array.')
+        if len(mask_data.shape) != 3:
+            raise ValueError('Invalid dimensions for mask data.')
+        if mask_data.dtype != np.uint8:
+            raise ValueError('Invalid data type for mask data: uint8 is required.')
+        if mask_data.shape[0] != box_data.shape[0]:
+            raise ValueError('There should be the same number of boxes and masks.')
+        self.data['masks'] = mask_data
+
+    def get_masks(self):
+        """Convenience function for accessing masks.
+    
+        Returns:
+            a numpy array of shape [N, height, width] representing masks
+        """
+        return self.get_field('masks')
+
+
+def boxlist_to_masklist(boxlist):
+    """Converts a BoxList containing 'masks' into a BoxMaskList.
+  
+    Args:
+        boxlist: An np_box_list.BoxList object.
+  
+    Returns:
+        An BoxMaskList object.
+  
+    Raises:
+        ValueError: If boxlist does not contain `masks` as a field.
+    """
+    if not boxlist.has_field('masks'):
+        raise ValueError('boxlist does not contain mask field.')
+    masklist = MaskList(box_data=boxlist.get(), mask_data=boxlist.get_field('masks'))
+    extra_fields = boxlist.get_extra_fields()
+    for key in extra_fields:
+        if key != 'masks':
+            masklist.data[key] = boxlist.get_field(key)
+    return masklist
+
+
+def area_mask(masks):
+    """Computes area of masks.
+
+    Args:
+        masks: Numpy array with shape [N, height, width] holding N masks. Masks
+        values are of type np.uint8 and values are in {0,1}.
+
+    Returns:
+        a numpy array with shape [N*1] representing mask areas.
+
+    Raises:
+        ValueError: If masks.dtype is not np.uint8
+    """
+    if masks.dtype != np.uint8:
+        raise ValueError('Masks type should be np.uint8')
+    return np.sum(masks, axis=(1, 2), dtype=np.float32)
+
+
+def intersection_mask(masks1, masks2):
+    """Compute pairwise intersection areas between masks.
+
+    Args:
+        masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+        masks2: a numpy array with shape [M, height, width] holding M masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+
+    Returns:
+        a numpy array with shape [N*M] representing pairwise intersection area.
+
+    Raises:
+        ValueError: If masks1 and masks2 are not of type np.uint8.
+    """
+    if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+        raise ValueError('masks1 and masks2 should be of type np.uint8')
+    n = masks1.shape[0]
+    m = masks2.shape[0]
+    answer = np.zeros([n, m], dtype=np.float32)
+    for i in np.arange(n):
+        for j in np.arange(m):
+            answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32)
+    return answer
+
+
+def iou_mask(masks1, masks2):
+    """Computes pairwise intersection-over-union between mask collections.
+
+    Args:
+        masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+        masks2: a numpy array with shape [M, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+
+    Returns:
+        a numpy array with shape [N, M] representing pairwise iou scores.
+
+    Raises:
+        ValueError: If masks1 and masks2 are not of type np.uint8.
+    """
+    if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+        raise ValueError('masks1 and masks2 should be of type np.uint8')
+    intersect = intersection(masks1, masks2)
+    area1 = area(masks1)
+    area2 = area(masks2)
+    union = np.expand_dims(area1, axis=1) + np.expand_dims(area2, axis=0) - intersect
+    return intersect / np.maximum(union, EPSILON)
+
+
+def ioa_mask(masks1, masks2):
+    """Computes pairwise intersection-over-area between box collections.
+
+    Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
+    their intersection area over mask2's area. Note that ioa is not symmetric,
+    that is, IOA(mask1, mask2) != IOA(mask2, mask1).
+
+    Args:
+        masks1: a numpy array with shape [N, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+        masks2: a numpy array with shape [M, height, width] holding N masks. Masks
+            values are of type np.uint8 and values are in {0,1}.
+
+    Returns:
+        a numpy array with shape [N, M] representing pairwise ioa scores.
+
+    Raises:
+        ValueError: If masks1 and masks2 are not of type np.uint8.
+    """
+    if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
+        raise ValueError('masks1 and masks2 should be of type np.uint8')
+    intersect = intersection(masks1, masks2)
+    areas = np.expand_dims(area(masks2), axis=0)
+    return intersect / (areas + EPSILON)
+
+
+def area_masklist(masklist):
+    """Computes area of masks.
+  
+    Args:
+        masklist: BoxMaskList holding N boxes and masks
+  
+    Returns:
+        a numpy array with shape [N*1] representing mask areas
+    """
+    return area_mask(masklist.get_masks())
+
+
+def intersection_masklist(masklist1, masklist2):
+    """Compute pairwise intersection areas between masks.
+  
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks
+        masklist2: BoxMaskList holding M boxes and masks
+  
+    Returns:
+        a numpy array with shape [N*M] representing pairwise intersection area
+    """
+    return intersection_mask(masklist1.get_masks(), masklist2.get_masks())
+
+
+def iou_masklist(masklist1, masklist2):
+    """Computes pairwise intersection-over-union between box and mask collections.
+  
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks
+        masklist2: BoxMaskList holding M boxes and masks
+  
+    Returns:
+        a numpy array with shape [N, M] representing pairwise iou scores.
+    """
+    return iou_mask(masklist1.get_masks(), masklist2.get_masks())
+
+
+def ioa_masklist(masklist1, masklist2):
+    """Computes pairwise intersection-over-area between box and mask collections.
+  
+    Intersection-over-area (ioa) between two masks mask1 and mask2 is defined as
+    their intersection area over mask2's area. Note that ioa is not symmetric,
+    that is, IOA(mask1, mask2) != IOA(mask2, mask1).
+  
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks
+        masklist2: BoxMaskList holding M boxes and masks
+  
+    Returns:
+        a numpy array with shape [N, M] representing pairwise ioa scores.
+    """
+    return ioa_mask(masklist1.get_masks(), masklist2.get_masks())
+
+
+def gather_masklist(masklist, indices, fields=None):
+    """Gather boxes from BoxMaskList according to indices.
+  
+    By default, gather returns boxes corresponding to the input index list, as
+    well as all additional fields stored in the masklist (indexing into the
+    first dimension).  However one can optionally only gather from a
+    subset of fields.
+  
+    Args:
+        masklist: BoxMaskList holding N boxes
+        indices: a 1-d numpy array of type int_
+        fields: (optional) list of fields to also gather from.  If None (default), all fields
+            are gathered from.  Pass an empty fields list to only gather the box coordinates.
+  
+    Returns:
+        submasklist: a BoxMaskList corresponding to the subset of the input masklist specified by indices
+  
+    Raises:
+        ValueError: if specified field is not contained in masklist or if the indices are not of type int_
+    """
+    if fields is not None:
+        if 'masks' not in fields:
+            fields.append('masks')
+    return boxlist_to_masklist(gather_boxlist(boxlist=masklist, indices=indices, fields=fields))
+
+
+def sort_by_field_masklist(masklist, field, order=SortOrder.DESCEND):
+    """Sort boxes and associated fields according to a scalar field.
+  
+    A common use case is reordering the boxes according to descending scores.
+  
+    Args:
+        masklist: BoxMaskList holding N boxes.
+        field: A BoxMaskList field for sorting and reordering the BoxMaskList.
+        order: (Optional) 'descend' or 'ascend'. Default is descend.
+  
+    Returns:
+        sorted_masklist: A sorted BoxMaskList with the field in the specified order.
+    """
+    return boxlist_to_masklist(sort_by_field_boxlist(boxlist=masklist, field=field, order=order))
+
+
+def non_max_suppression_mask(masklist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0):
+    """Non maximum suppression.
+  
+    This op greedily selects a subset of detection bounding boxes, pruning
+    away boxes that have high IOU (intersection over union) overlap (> thresh)
+    with already selected boxes. In each iteration, the detected bounding box with
+    highest score in the available pool is selected.
+  
+    Args:
+        masklist: BoxMaskList holding N boxes.  Must contain a 'scores' field representing
+            detection scores. All scores belong to the same class.
+        max_output_size: maximum number of retained boxes
+        iou_threshold: intersection over union threshold.
+        score_threshold: minimum score threshold. Remove the boxes with scores
+            less than this value. Default value is set to -10. A very
+            low threshold to pass pretty much all the boxes, unless
+            the user sets a different score threshold.
+  
+    Returns:
+        an BoxMaskList holding M boxes where M <= max_output_size
+  
+    Raises:
+        ValueError: if 'scores' field does not exist
+        ValueError: if threshold is not in [0, 1]
+        ValueError: if max_output_size < 0
+    """
+    if not masklist.has_field('scores'):
+        raise ValueError('Field scores does not exist')
+    if iou_threshold < 0. or iou_threshold > 1.0:
+        raise ValueError('IOU threshold must be in [0, 1]')
+    if max_output_size < 0:
+        raise ValueError('max_output_size must be bigger than 0.')
+
+    masklist = filter_scores_greater_than(masklist, score_threshold)
+    if masklist.num_boxes() == 0:
+        return masklist
+
+    masklist = sort_by_field_boxlist(masklist, 'scores')
+
+    # Prevent further computation if NMS is disabled.
+    if iou_threshold == 1.0:
+        if masklist.num_boxes() > max_output_size:
+            selected_indices = np.arange(max_output_size)
+            return gather_masklist(masklist, selected_indices)
+        else:
+            return masklist
+
+    masks = masklist.get_masks()
+    num_masks = masklist.num_boxes()
+
+    # is_index_valid is True only for all remaining valid boxes,
+    is_index_valid = np.full(num_masks, 1, dtype=bool)
+    selected_indices = []
+    num_output = 0
+    for i in range(num_masks):
+        if num_output < max_output_size:
+            if is_index_valid[i]:
+                num_output += 1
+                selected_indices.append(i)
+                is_index_valid[i] = False
+                valid_indices = np.where(is_index_valid)[0]
+                if valid_indices.size == 0:
+                    break
+
+                intersect_over_union = iou_mask(np.expand_dims(masks[i], axis=0), masks[valid_indices])
+                intersect_over_union = np.squeeze(intersect_over_union, axis=0)
+                is_index_valid[valid_indices] = np.logical_and(
+                    is_index_valid[valid_indices],
+                    intersect_over_union <= iou_threshold)
+    return gather_masklist(masklist, np.array(selected_indices))
+
+
+def multi_class_non_max_suppression_mask(masklist, score_thresh, iou_thresh, max_output_size):
+    """Multi-class version of non maximum suppression.
+  
+    This op greedily selects a subset of detection bounding boxes, pruning away boxes that have
+    high IOU (intersection over union) overlap (> thresh) with already selected boxes.  It
+    operates independently for each class for which scores are provided (via the scores field
+    of the input box_list), pruning boxes with score less than a provided threshold prior to
+    applying NMS.
+  
+    Args:
+        masklist: BoxMaskList holding N boxes.  Must contain a 'scores' field representing detection
+            scores.  This scores field is a tensor that can be 1 dimensional (in the case of a
+            single class) or 2-dimensional, in which case we assume that it takes the shape
+            [num_boxes, num_classes]. We further assume that this rank is known statically and
+            that scores.shape[1] is also known (i.e., the number of classes is fixed and known
+            at graph construction time).
+        score_thresh: scalar threshold for score (low scoring boxes are removed).
+        iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap with previously
+            selected boxes are removed).
+        max_output_size: maximum number of retained boxes per class.
+  
+    Returns:
+        a masklist holding M boxes with a rank-1 scores field representing
+        corresponding scores for each box with scores sorted in decreasing order
+        and a rank-1 classes field representing a class label for each box.
+    Raises:
+        ValueError: if iou_thresh is not in [0, 1] or if input masklist does not have a valid scores field.
+    """
+    if not 0 <= iou_thresh <= 1.0:
+        raise ValueError('thresh must be between 0 and 1')
+    if not isinstance(masklist, MaskList):
+        raise ValueError('masklist must be a masklist')
+    if not masklist.has_field('scores'):
+        raise ValueError('input masklist must have \'scores\' field')
+    scores = masklist.get_field('scores')
+    if len(scores.shape) == 1:
+        scores = np.reshape(scores, [-1, 1])
+    elif len(scores.shape) == 2:
+        if scores.shape[1] is None:
+            raise ValueError('scores field must have statically defined second dimension')
+    else:
+        raise ValueError('scores field must be of rank 1 or 2')
+
+    num_boxes = masklist.num_boxes()
+    num_scores = scores.shape[0]
+    num_classes = scores.shape[1]
+
+    if num_boxes != num_scores:
+        raise ValueError('Incorrect scores field length: actual vs expected.')
+
+    selected_boxes_list = []
+    for class_idx in range(num_classes):
+        masklist_and_class_scores = MaskList(box_data=masklist.get(), mask_data=masklist.get_masks())
+        class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
+        masklist_and_class_scores.add_field('scores', class_scores)
+        masklist_filt = filter_scores_greater_than(masklist_and_class_scores, score_thresh)
+        nms_result = non_max_suppression(
+            masklist_filt,
+            max_output_size=max_output_size,
+            iou_threshold=iou_thresh,
+            score_threshold=score_thresh)
+        nms_result.add_field('classes', np.zeros_like(nms_result.get_field('scores')) + class_idx)
+        selected_boxes_list.append(nms_result)
+    selected_boxes = concatenate_boxlist(selected_boxes_list)
+    sorted_boxes = sort_by_field_boxlist(selected_boxes, 'scores')
+    return boxlist_to_masklist(boxlist=sorted_boxes)
+
+
+def prune_non_overlapping_masklist(masklist1, masklist2, minoverlap=0.0):
+    """Prunes the boxes in list1 that overlap less than thresh with list2.
+  
+    For each mask in masklist1, we want its IOA to be more than minoverlap
+    with at least one of the masks in masklist2. If it does not, we remove
+    it. If the masks are not full size image, we do the pruning based on boxes.
+  
+    Args:
+        masklist1: BoxMaskList holding N boxes and masks.
+        masklist2: BoxMaskList holding M boxes and masks.
+        minoverlap: Minimum required overlap between boxes, to count them as overlapping.
+  
+    Returns:
+        A pruned masklist with size [N', 4].
+    """
+    intersection_over_area = ioa_masklist(masklist2, masklist1)  # [M, N] tensor
+    intersection_over_area = np.amax(intersection_over_area, axis=0)  # [N] tensor
+    keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
+    keep_inds = np.nonzero(keep_bool)[0]
+    new_masklist1 = gather_masklist(masklist1, keep_inds)
+    return new_masklist1
+
+
+def concatenate_masklist(masklists, fields=None):
+    """Concatenate list of masklists.
+  
+    This op concatenates a list of input masklists into a larger
+    masklist.  It also
+    handles concatenation of masklist fields as long as the field tensor
+    shapes are equal except for the first dimension.
+  
+    Args:
+        masklists: list of BoxMaskList objects
+        fields: optional list of fields to also concatenate.  By default, all
+            fields from the first BoxMaskList in the list are included in the concatenation.
+  
+    Returns:
+        a masklist with number of boxes equal to sum([masklist.num_boxes() for masklist in masklist])
+    Raises:
+        ValueError: if masklists is invalid (i.e., is not a list, is empty, or contains non
+            masklist objects), or if requested fields are not contained in all masklists
+    """
+    if fields is not None:
+        if 'masks' not in fields:
+            fields.append('masks')
+    return boxlist_to_masklist(concatenate_boxlist(boxlists=masklists, fields=fields))
+
+
+def filter_scores_greater_than_masklist(masklist, thresh):
+    """Filter to keep only boxes and masks with score exceeding a given threshold.
+  
+    This op keeps the collection of boxes and masks whose corresponding scores are
+    greater than the input threshold.
+  
+    Args:
+        masklist: BoxMaskList holding N boxes and masks.  Must contain a
+            'scores' field representing detection scores.
+        thresh: scalar threshold
+  
+    Returns:
+        a BoxMaskList holding M boxes and masks where M <= N
+  
+    Raises:
+        ValueError: if masklist not a BoxMaskList object or if it does not have a scores field
+    """
+    if not isinstance(masklist, MaskList):
+        raise ValueError('masklist must be a BoxMaskList')
+    if not masklist.has_field('scores'):
+        raise ValueError('input masklist must have \'scores\' field')
+    scores = masklist.get_field('scores')
+    if len(scores.shape) > 2:
+        raise ValueError('Scores should have rank 1 or 2')
+    if len(scores.shape) == 2 and scores.shape[1] != 1:
+        raise ValueError('Scores should have rank 1 or have shape consistent with [None, 1]')
+    high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), [-1]).astype(np.int32)
+    return gather_masklist(masklist, high_score_indices)
diff --git a/efficientdet/effdet/evaluation/object_detection_evaluation.py b/efficientdet/effdet/evaluation/object_detection_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee9211196f0493e0837f72b35b3542a1d882ef45
--- /dev/null
+++ b/efficientdet/effdet/evaluation/object_detection_evaluation.py
@@ -0,0 +1,273 @@
+import logging
+
+import numpy as np
+
+from effdet.evaluation.metrics import compute_precision_recall, compute_average_precision, compute_cor_loc
+from effdet.evaluation.per_image_evaluation import PerImageEvaluation
+
+
+class ObjectDetectionEvaluation:
+    """Internal implementation of Pascal object detection metrics."""
+
+    def __init__(self,
+                 num_gt_classes,
+                 matching_iou_threshold=0.5,
+                 nms_iou_threshold=1.0,
+                 nms_max_output_boxes=10000,
+                 recall_lower_bound=0.0,
+                 recall_upper_bound=1.0,
+                 use_weighted_mean_ap=False,
+                 label_id_offset=0,
+                 group_of_weight=0.0,
+                 per_image_eval_class=PerImageEvaluation):
+        """Constructor.
+        Args:
+            num_gt_classes: Number of ground-truth classes.
+            matching_iou_threshold: IOU threshold used for matching detected boxes to ground-truth boxes.
+            nms_iou_threshold: IOU threshold used for non-maximum suppression.
+            nms_max_output_boxes: Maximum number of boxes returned by non-maximum suppression.
+            recall_lower_bound: lower bound of recall operating area
+            recall_upper_bound: upper bound of recall operating area
+            use_weighted_mean_ap: (optional) boolean which determines if the mean
+                average precision is computed directly from the scores and tp_fp_labels of all classes.
+            label_id_offset: The label id offset.
+            group_of_weight: Weight of group-of boxes.If set to 0, detections of the
+                correct class within a group-of box are ignored. If weight is > 0, then
+                if at least one detection falls within a group-of box with
+                matching_iou_threshold, weight group_of_weight is added to true
+                positives. Consequently, if no detection falls within a group-of box,
+                weight group_of_weight is added to false negatives.
+            per_image_eval_class: The class that contains functions for computing per image metrics.
+        Raises:
+            ValueError: if num_gt_classes is smaller than 1.
+        """
+        if num_gt_classes < 1:
+            raise ValueError('Need at least 1 groundtruth class for evaluation.')
+
+        self.per_image_eval = per_image_eval_class(
+            num_gt_classes=num_gt_classes,
+            matching_iou_threshold=matching_iou_threshold,
+            nms_iou_threshold=nms_iou_threshold,
+            nms_max_output_boxes=nms_max_output_boxes,
+            group_of_weight=group_of_weight)
+        self.recall_lower_bound = recall_lower_bound
+        self.recall_upper_bound = recall_upper_bound
+        self.group_of_weight = group_of_weight
+        self.num_class = num_gt_classes
+        self.use_weighted_mean_ap = use_weighted_mean_ap
+        self.label_id_offset = label_id_offset
+
+        self.gt_boxes = {}
+        self.gt_class_labels = {}
+        self.gt_masks = {}
+        self.gt_is_difficult_list = {}
+        self.gt_is_group_of_list = {}
+        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float)
+        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)
+
+        self._initialize_detections()
+
+    def _initialize_detections(self):
+        """Initializes internal data structures."""
+        self.detection_keys = set()
+        self.scores_per_class = [[] for _ in range(self.num_class)]
+        self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
+        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
+        self.average_precision_per_class = np.empty(self.num_class, dtype=float)
+        self.average_precision_per_class.fill(np.nan)
+        self.precisions_per_class = [np.nan] * self.num_class
+        self.recalls_per_class = [np.nan] * self.num_class
+        self.sum_tp_class = [np.nan] * self.num_class
+
+        self.corloc_per_class = np.ones(self.num_class, dtype=float)
+
+    def clear_detections(self):
+        self._initialize_detections()
+
+    def add_single_ground_truth_image_info(
+            self, image_key, gt_boxes, gt_class_labels,
+            gt_is_difficult_list=None, gt_is_group_of_list=None, gt_masks=None):
+        """Adds groundtruth for a single image to be used for evaluation.
+        Args:
+            image_key: A unique string/integer identifier for the image.
+            gt_boxes: float32 numpy array of shape [num_boxes, 4] containing
+                `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
+            gt_class_labels: integer numpy array of shape [num_boxes]
+                containing 0-indexed groundtruth classes for the boxes.
+            gt_is_difficult_list: A length M numpy boolean array denoting
+                whether a ground truth box is a difficult instance or not. To support
+                the case that no boxes are difficult, it is by default set as None.
+            gt_is_group_of_list: A length M numpy boolean array denoting
+                whether a ground truth box is a group-of box or not. To support the case
+                that no boxes are groups-of, it is by default set as None.
+            gt_masks: uint8 numpy array of shape [num_boxes, height, width]
+                containing `num_boxes` groundtruth masks. The mask values range from 0 to 1.
+        """
+        if image_key in self.gt_boxes:
+            logging.warning('image %s has already been added to the ground truth database.', image_key)
+            return
+
+        self.gt_boxes[image_key] = gt_boxes
+        self.gt_class_labels[image_key] = gt_class_labels
+        self.gt_masks[image_key] = gt_masks
+        if gt_is_difficult_list is None:
+            num_boxes = gt_boxes.shape[0]
+            gt_is_difficult_list = np.zeros(num_boxes, dtype=bool)
+        gt_is_difficult_list = gt_is_difficult_list.astype(dtype=bool)
+        self.gt_is_difficult_list[image_key] = gt_is_difficult_list
+        if gt_is_group_of_list is None:
+            num_boxes = gt_boxes.shape[0]
+            gt_is_group_of_list = np.zeros(num_boxes, dtype=bool)
+        if gt_masks is None:
+            num_boxes = gt_boxes.shape[0]
+            mask_presence_indicator = np.zeros(num_boxes, dtype=bool)
+        else:
+            mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) == 0).astype(dtype=bool)
+
+        gt_is_group_of_list = gt_is_group_of_list.astype(dtype=bool)
+        self.gt_is_group_of_list[image_key] = gt_is_group_of_list
+
+        # ignore boxes without masks
+        masked_gt_is_difficult_list = gt_is_difficult_list | mask_presence_indicator
+        for class_index in range(self.num_class):
+            num_gt_instances = np.sum(
+                gt_class_labels[~masked_gt_is_difficult_list & ~gt_is_group_of_list] == class_index)
+            num_groupof_gt_instances = self.group_of_weight * np.sum(
+                gt_class_labels[gt_is_group_of_list & ~masked_gt_is_difficult_list] == class_index)
+            self.num_gt_instances_per_class[class_index] += num_gt_instances + num_groupof_gt_instances
+            if np.any(gt_class_labels == class_index):
+                self.num_gt_imgs_per_class[class_index] += 1
+
+    def add_single_detected_image_info(
+            self, image_key, detected_boxes, detected_scores, detected_class_labels, detected_masks=None):
+        """Adds detections for a single image to be used for evaluation.
+        Args:
+            image_key: A unique string/integer identifier for the image.
+            detected_boxes: float32 numpy array of shape [num_boxes, 4] containing
+                `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] in
+                absolute image coordinates.
+            detected_scores: float32 numpy array of shape [num_boxes] containing
+                detection scores for the boxes.
+            detected_class_labels: integer numpy array of shape [num_boxes] containing
+                0-indexed detection classes for the boxes.
+            detected_masks: np.uint8 numpy array of shape [num_boxes, height, width]
+                containing `num_boxes` detection masks with values ranging between 0 and 1.
+        Raises:
+            ValueError: if the number of boxes, scores and class labels differ in length.
+        """
+        if len(detected_boxes) != len(detected_scores) or len(detected_boxes) != len(detected_class_labels):
+            raise ValueError(
+                'detected_boxes, detected_scores and '
+                'detected_class_labels should all have same lengths. Got'
+                '[%d, %d, %d]' % len(detected_boxes), len(detected_scores),
+                len(detected_class_labels))
+
+        if image_key in self.detection_keys:
+            logging.warning('image %s has already been added to the detection result database', image_key)
+            return
+
+        self.detection_keys.add(image_key)
+        if image_key in self.gt_boxes:
+            gt_boxes = self.gt_boxes[image_key]
+            gt_class_labels = self.gt_class_labels[image_key]
+            # Masks are popped instead of look up. The reason is that we do not want
+            # to keep all masks in memory which can cause memory overflow.
+            gt_masks = self.gt_masks.pop(image_key)
+            gt_is_difficult_list = self.gt_is_difficult_list[image_key]
+            gt_is_group_of_list = self.gt_is_group_of_list[image_key]
+        else:
+            gt_boxes = np.empty(shape=[0, 4], dtype=float)
+            gt_class_labels = np.array([], dtype=int)
+            if detected_masks is None:
+                gt_masks = None
+            else:
+                gt_masks = np.empty(shape=[0, 1, 1], dtype=float)
+            gt_is_difficult_list = np.array([], dtype=bool)
+            gt_is_group_of_list = np.array([], dtype=bool)
+        scores, tp_fp_labels, is_class_correctly_detected_in_image = \
+            self.per_image_eval.compute_object_detection_metrics(
+                detected_boxes=detected_boxes,
+                detected_scores=detected_scores,
+                detected_class_labels=detected_class_labels,
+                gt_boxes=gt_boxes,
+                gt_class_labels=gt_class_labels,
+                gt_is_difficult_list=gt_is_difficult_list,
+                gt_is_group_of_list=gt_is_group_of_list,
+                detected_masks=detected_masks,
+                gt_masks=gt_masks)
+
+        for i in range(self.num_class):
+            if scores[i].shape[0] > 0:
+                self.scores_per_class[i].append(scores[i])
+                self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
+        self.num_images_correctly_detected_per_class += is_class_correctly_detected_in_image
+
+    def evaluate(self):
+        """Compute evaluation result.
+        Returns:
+            A dict with the following fields -
+                average_precision: float numpy array of average precision for each class.
+                mean_ap: mean average precision of all classes, float scalar
+                precisions: List of precisions, each precision is a float numpy array
+                recalls: List of recalls, each recall is a float numpy array
+                corloc: numpy float array
+                mean_corloc: Mean CorLoc score for each class, float scalar
+        """
+        if (self.num_gt_instances_per_class == 0).any():
+            logging.warning(
+                'The following classes have no ground truth examples: %s',
+                np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset)
+
+        if self.use_weighted_mean_ap:
+            all_scores = np.array([], dtype=float)
+            all_tp_fp_labels = np.array([], dtype=bool)
+        for class_index in range(self.num_class):
+            if self.num_gt_instances_per_class[class_index] == 0:
+                continue
+            if not self.scores_per_class[class_index]:
+                scores = np.array([], dtype=float)
+                tp_fp_labels = np.array([], dtype=float)
+            else:
+                scores = np.concatenate(self.scores_per_class[class_index])
+                tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
+            if self.use_weighted_mean_ap:
+                all_scores = np.append(all_scores, scores)
+                all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
+            precision, recall = compute_precision_recall(
+                scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
+            recall_within_bound_indices = [
+                index for index, value in enumerate(recall) if
+                value >= self.recall_lower_bound and value <= self.recall_upper_bound
+            ]
+            recall_within_bound = recall[recall_within_bound_indices]
+            precision_within_bound = precision[recall_within_bound_indices]
+
+            self.precisions_per_class[class_index] = precision_within_bound
+            self.recalls_per_class[class_index] = recall_within_bound
+            self.sum_tp_class[class_index] = tp_fp_labels.sum()
+            average_precision = compute_average_precision(precision_within_bound, recall_within_bound)
+            self.average_precision_per_class[class_index] = average_precision
+            logging.debug('average_precision: %f', average_precision)
+
+        self.corloc_per_class = compute_cor_loc(
+            self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class)
+
+        if self.use_weighted_mean_ap:
+            num_gt_instances = np.sum(self.num_gt_instances_per_class)
+            precision, recall = compute_precision_recall(all_scores, all_tp_fp_labels, num_gt_instances)
+            recall_within_bound_indices = [
+                index for index, value in enumerate(recall) if
+                value >= self.recall_lower_bound and value <= self.recall_upper_bound
+            ]
+            recall_within_bound = recall[recall_within_bound_indices]
+            precision_within_bound = precision[recall_within_bound_indices]
+            mean_ap = compute_average_precision(precision_within_bound, recall_within_bound)
+        else:
+            mean_ap = np.nanmean(self.average_precision_per_class)
+        mean_corloc = np.nanmean(self.corloc_per_class)
+
+        return dict(
+            per_class_ap=self.average_precision_per_class, mean_ap=mean_ap,
+            per_class_precision=self.precisions_per_class,
+            per_class_recall=self.recalls_per_class,
+            per_class_corlocs=self.corloc_per_class, mean_corloc=mean_corloc)
diff --git a/efficientdet/effdet/evaluation/per_image_evaluation.py b/efficientdet/effdet/evaluation/per_image_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..e904027c5214b82d5729c5132914daff244c4c98
--- /dev/null
+++ b/efficientdet/effdet/evaluation/per_image_evaluation.py
@@ -0,0 +1,538 @@
+from .np_mask_list import *
+from .metrics import *
+
+
+class PerImageEvaluation:
+    """Evaluate detection result of a single image."""
+
+    def __init__(self,
+                 num_gt_classes,
+                 matching_iou_threshold=0.5,
+                 nms_iou_threshold=0.3,
+                 nms_max_output_boxes=50,
+                 group_of_weight=0.0):
+        """Initialized PerImageEvaluation by evaluation parameters.
+        Args:
+            num_gt_classes: Number of ground truth object classes
+            matching_iou_threshold: A ratio of area intersection to union, which is
+                the threshold to consider whether a detection is true positive or not
+            nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
+            nms_max_output_boxes: Number of maximum output boxes in NMS.
+            group_of_weight: Weight of the group-of boxes.
+        """
+        self.matching_iou_threshold = matching_iou_threshold
+        self.nms_iou_threshold = nms_iou_threshold
+        self.nms_max_output_boxes = nms_max_output_boxes
+        self.num_gt_classes = num_gt_classes
+        self.group_of_weight = group_of_weight
+
+    def compute_object_detection_metrics(
+            self, detected_boxes, detected_scores, detected_class_labels,
+            gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list,
+            detected_masks=None, gt_masks=None):
+        """Evaluates detections as being tp, fp or weighted from a single image.
+        The evaluation is done in two stages:
+            1. All detections are matched to non group-of boxes; true positives are
+               determined and detections matched to difficult boxes are ignored.
+            2. Detections that are determined as false positives are matched against
+               group-of boxes and weighted if matched.
+        Args:
+            detected_boxes: A float numpy array of shape [N, 4], representing N
+                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
+            detected_scores: A float numpy array of shape [N, 1], representing the
+                confidence scores of the detected N object instances.
+            detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+                the class labels of the detected N object instances.
+            gt_boxes: A float numpy array of shape [M, 4], representing M
+                regions of object instances in ground truth
+            gt_class_labels: An integer numpy array of shape [M, 1],
+                representing M class labels of object instances in ground truth
+            gt_is_difficult_list: A boolean numpy array of length M denoting
+                whether a ground truth box is a difficult instance or not
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag
+            detected_masks: (optional) A uint8 numpy array of shape [N, height,
+                width]. If not None, the metrics will be computed based on masks.
+            gt_masks: (optional) A uint8 numpy array of shape [M, height,
+                width]. Can have empty masks, i.e. where all values are 0.
+        Returns:
+            scores: A list of C float numpy arrays. Each numpy array is of
+                shape [K, 1], representing K scores detected with object class label c
+            tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
+                is of shape [K, 1], representing K True/False positive label of
+                object instances detected with class label c
+            is_class_correctly_detected_in_image: a numpy integer array of
+                shape [C, 1], indicating whether the correponding class has a least
+                one instance being correctly detected in the image
+        """
+        detected_boxes, detected_scores, detected_class_labels, detected_masks = (
+            self._remove_invalid_boxes(detected_boxes, detected_scores, detected_class_labels, detected_masks))
+
+        scores, tp_fp_labels = self._compute_tp_fp(
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detected_class_labels,
+            gt_boxes=gt_boxes,
+            gt_class_labels=gt_class_labels,
+            gt_is_difficult_list=gt_is_difficult_list,
+            gt_is_group_of_list=gt_is_group_of_list,
+            detected_masks=detected_masks,
+            gt_masks=gt_masks)
+
+        is_class_correctly_detected_in_image = self._compute_cor_loc(
+            detected_boxes=detected_boxes,
+            detected_scores=detected_scores,
+            detected_class_labels=detected_class_labels,
+            gt_boxes=gt_boxes,
+            gt_class_labels=gt_class_labels,
+            detected_masks=detected_masks,
+            gt_masks=gt_masks)
+
+        return scores, tp_fp_labels, is_class_correctly_detected_in_image
+
+    def _compute_cor_loc(
+            self, detected_boxes, detected_scores, detected_class_labels,
+            gt_boxes, gt_class_labels, detected_masks=None, gt_masks=None):
+        """Compute CorLoc score for object detection result.
+        Args:
+            detected_boxes: A float numpy array of shape [N, 4], representing N
+                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
+            detected_scores: A float numpy array of shape [N, 1], representing the
+                confidence scores of the detected N object instances.
+            detected_class_labels: A integer numpy array of shape [N, 1], repreneting
+                the class labels of the detected N object instances.
+            gt_boxes: A float numpy array of shape [M, 4], representing M
+                regions of object instances in ground truth
+            gt_class_labels: An integer numpy array of shape [M, 1],
+                representing M class labels of object instances in ground truth
+            detected_masks: (optional) A uint8 numpy array of shape [N, height, width].
+                If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A uint8 numpy array of shape [M, height, width].
+        Returns:
+            is_class_correctly_detected_in_image: a numpy integer array of
+                shape [C, 1], indicating whether the correponding class has a least
+                one instance being correctly detected in the image
+        Raises:
+            ValueError: If detected masks is not None but groundtruth masks are None,
+                or the other way around.
+        """
+        if (detected_masks is not None and gt_masks is None) or (
+                detected_masks is None and gt_masks is not None):
+            raise ValueError(
+                'If `detected_masks` is provided, then `gt_masks` should also be provided.')
+
+        is_class_correctly_detected_in_image = np.zeros(
+            self.num_gt_classes, dtype=int)
+        for i in range(self.num_gt_classes):
+            (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+             detected_boxes_at_ith_class, detected_scores_at_ith_class,
+             detected_masks_at_ith_class) = self._get_ith_class_arrays(
+                detected_boxes, detected_scores, detected_masks,
+                detected_class_labels, gt_boxes, gt_masks,
+                gt_class_labels, i)
+            is_class_correctly_detected_in_image[i] = (
+                self._compute_is_class_correctly_detected_in_image(
+                    detected_boxes=detected_boxes_at_ith_class,
+                    detected_scores=detected_scores_at_ith_class,
+                    gt_boxes=gt_boxes_at_ith_class,
+                    detected_masks=detected_masks_at_ith_class,
+                    gt_masks=gt_masks_at_ith_class))
+
+        return is_class_correctly_detected_in_image
+
+    def _compute_is_class_correctly_detected_in_image(
+            self, detected_boxes, detected_scores, gt_boxes, detected_masks=None, gt_masks=None):
+        """Compute CorLoc score for a single class.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            detected_masks: (optional) A np.uint8 numpy array of shape [N, height, width].
+                If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width].
+        Returns:
+            is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
+                class is correctly detected in the image or not
+        """
+        if detected_boxes.size > 0:
+            if gt_boxes.size > 0:
+                max_score_id = np.argmax(detected_scores)
+                mask_mode = False
+                if detected_masks is not None and gt_masks is not None:
+                    mask_mode = True
+                if mask_mode:
+                    detected_boxlist = MaskList(
+                        box_data=np.expand_dims(detected_boxes[max_score_id], axis=0),
+                        mask_data=np.expand_dims(detected_masks[max_score_id], axis=0))
+                    gt_boxlist = MaskList(box_data=gt_boxes, mask_data=gt_masks)
+                    iou = iou_masklist(detected_boxlist, gt_boxlist)
+                else:
+                    detected_boxlist = BoxList(np.expand_dims(detected_boxes[max_score_id, :], axis=0))
+                    gt_boxlist = BoxList(gt_boxes)
+                    iou = iou_boxlist(detected_boxlist, gt_boxlist)
+                if np.max(iou) >= self.matching_iou_threshold:
+                    return 1
+        return 0
+
+    def _compute_tp_fp(
+            self, detected_boxes, detected_scores, detected_class_labels,
+            gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None):
+        """Labels true/false positives of detections of an image across all classes.
+        Args:
+            detected_boxes: A float numpy array of shape [N, 4], representing N
+                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
+            detected_scores: A float numpy array of shape [N, 1], representing the
+                confidence scores of the detected N object instances.
+            detected_class_labels: A integer numpy array of shape [N, 1], representing
+                the class labels of the detected N object instances.
+            gt_boxes: A float numpy array of shape [M, 4], representing M
+                regions of object instances in ground truth
+            gt_class_labels: An integer numpy array of shape [M, 1],
+                representing M class labels of object instances in ground truth
+            gt_is_difficult_list: A boolean numpy array of length M denoting
+                whether a ground truth box is a difficult instance or not
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag
+            detected_masks: (optional) A np.uint8 numpy array of shape [N, height,
+                width]. If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width].
+        Returns:
+            result_scores: A list of float numpy arrays. Each numpy array is of
+                 shape [K, 1], representing K scores detected with object class label c
+            result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
+                shape [K, 1], representing K True/False positive label of object
+                instances detected with class label c
+        Raises:
+            ValueError: If detected masks is not None but groundtruth masks are None,
+                or the other way around.
+        """
+        if detected_masks is not None and gt_masks is None:
+            raise ValueError(
+                'Detected masks is available but groundtruth masks is not.')
+        if detected_masks is None and gt_masks is not None:
+            raise ValueError(
+                'Groundtruth masks is available but detected masks is not.')
+
+        result_scores = []
+        result_tp_fp_labels = []
+        for i in range(self.num_gt_classes):
+            gt_is_difficult_list_at_ith_class = (
+                gt_is_difficult_list[gt_class_labels == i])
+            gt_is_group_of_list_at_ith_class = (
+                gt_is_group_of_list[gt_class_labels == i])
+            (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+             detected_boxes_at_ith_class, detected_scores_at_ith_class,
+             detected_masks_at_ith_class) = self._get_ith_class_arrays(
+                detected_boxes, detected_scores, detected_masks,
+                detected_class_labels, gt_boxes, gt_masks,
+                gt_class_labels, i)
+            scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
+                detected_boxes=detected_boxes_at_ith_class,
+                detected_scores=detected_scores_at_ith_class,
+                gt_boxes=gt_boxes_at_ith_class,
+                gt_is_difficult_list=gt_is_difficult_list_at_ith_class,
+                gt_is_group_of_list=gt_is_group_of_list_at_ith_class,
+                detected_masks=detected_masks_at_ith_class,
+                gt_masks=gt_masks_at_ith_class)
+            result_scores.append(scores)
+            result_tp_fp_labels.append(tp_fp_labels)
+        return result_scores, result_tp_fp_labels
+
+    def _get_overlaps_and_scores_mask_mode(
+            self, detected_boxes, detected_scores, detected_masks,
+            gt_boxes, gt_masks, gt_is_group_of_list):
+        """Computes overlaps and scores between detected and groudntruth masks.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            detected_masks: A uint8 numpy array of shape [N, height, width]. If not
+                None, the scores will be computed based on masks.
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            gt_masks: A uint8 numpy array of shape [M, height, width].
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag. If a groundtruth box is
+                group-of box, every detection matching this box is ignored.
+        Returns:
+            iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
+            ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_group_of_boxlist.num_boxes() == 0 it will be None.
+            scores: The score of the detected boxlist.
+            num_boxes: Number of non-maximum suppressed detected boxes.
+        """
+        detected_boxlist = MaskList(box_data=detected_boxes, mask_data=detected_masks)
+        detected_boxlist.add_field('scores', detected_scores)
+        detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+        gt_non_group_of_boxlist = MaskList(
+            box_data=gt_boxes[~gt_is_group_of_list], mask_data=gt_masks[~gt_is_group_of_list])
+        gt_group_of_boxlist = MaskList(
+            box_data=gt_boxes[gt_is_group_of_list], mask_data=gt_masks[gt_is_group_of_list])
+        iou_b = iou_masklist(detected_boxlist, gt_non_group_of_boxlist)
+        ioa_b = np.transpose(ioa_masklist(gt_group_of_boxlist, detected_boxlist))
+        scores = detected_boxlist.get_field('scores')
+        num_boxes = detected_boxlist.num_boxes()
+        return iou_b, ioa_b, scores, num_boxes
+
+    def _get_overlaps_and_scores_box_mode(
+            self, detected_boxes, detected_scores, gt_boxes, gt_is_group_of_list):
+        """Computes overlaps and scores between detected and groudntruth boxes.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag. If a groundtruth box is
+                group-of box, every detection matching this box is ignored.
+        Returns:
+            iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
+            ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
+                gt_group_of_boxlist.num_boxes() == 0 it will be None.
+            scores: The score of the detected boxlist.
+            num_boxes: Number of non-maximum suppressed detected boxes.
+        """
+        detected_boxlist = BoxList(detected_boxes)
+        detected_boxlist.add_field('scores', detected_scores)
+        detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
+        gt_non_group_of_boxlist = BoxList(gt_boxes[~gt_is_group_of_list])
+        gt_group_of_boxlist = BoxList(gt_boxes[gt_is_group_of_list])
+        iou_b = iou_boxlist(detected_boxlist, gt_non_group_of_boxlist)
+        ioa_b = np.transpose(ioa_boxlist(gt_group_of_boxlist, detected_boxlist))
+        scores = detected_boxlist.get_field('scores')
+        num_boxes = detected_boxlist.num_boxes()
+        return iou_b, ioa_b, scores, num_boxes
+
+    def _compute_tp_fp_for_single_class(
+            self, detected_boxes, detected_scores, gt_boxes,
+            gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None):
+        """Labels boxes detected with the same class from the same image as tp/fp.
+        Args:
+            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
+            detected_scores: A 1-d numpy array of length N representing classification score
+            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
+            gt_is_difficult_list: A boolean numpy array of length M denoting
+                whether a ground truth box is a difficult instance or not. If a
+                groundtruth box is difficult, every detection matching this box is ignored.
+            gt_is_group_of_list: A boolean numpy array of length M denoting
+                whether a ground truth box has group-of tag. If a groundtruth box is
+                group-of box, every detection matching this box is ignored.
+            detected_masks: (optional) A uint8 numpy array of shape [N, height,
+                width]. If not None, the scores will be computed based on masks.
+            gt_masks: (optional) A uint8 numpy array of shape [M, height, width].
+        Returns:
+            Two arrays of the same size, containing all boxes that were evaluated as
+            being true positives or false positives; if a box matched to a difficult
+            box or to a group-of box, it is ignored.
+            scores: A numpy array representing the detection scores.
+            tp_fp_labels: a boolean numpy array indicating whether a detection is a true positive.
+        """
+        if detected_boxes.size == 0:
+            return np.array([], dtype=float), np.array([], dtype=bool)
+
+        mask_mode = False
+        if detected_masks is not None and gt_masks is not None:
+            mask_mode = True
+
+        iou_b = np.ndarray([0, 0])
+        ioa_b = np.ndarray([0, 0])
+        iou_m = np.ndarray([0, 0])
+        ioa_m = np.ndarray([0, 0])
+        if mask_mode:
+            # For Instance Segmentation Evaluation on Open Images V5, not all boxed
+            # instances have corresponding segmentation annotations. Those boxes that
+            # dont have segmentation annotations are represented as empty masks in
+            # gt_masks nd array.
+            mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) > 0)
+
+            iou_m, ioa_m, scores, num_detected_boxes = self._get_overlaps_and_scores_mask_mode(
+                detected_boxes=detected_boxes,
+                detected_scores=detected_scores,
+                detected_masks=detected_masks,
+                gt_boxes=gt_boxes[mask_presence_indicator, :],
+                gt_masks=gt_masks[mask_presence_indicator, :],
+                gt_is_group_of_list=gt_is_group_of_list[mask_presence_indicator])
+
+            if sum(mask_presence_indicator) < len(mask_presence_indicator):
+                # Not all masks are present - some masks are empty
+                iou_b, ioa_b, _, num_detected_boxes = self._get_overlaps_and_scores_box_mode(
+                    detected_boxes=detected_boxes,
+                    detected_scores=detected_scores,
+                    gt_boxes=gt_boxes[~mask_presence_indicator, :],
+                    gt_is_group_of_list=gt_is_group_of_list[~mask_presence_indicator])
+            num_detected_boxes = detected_boxes.shape[0]
+        else:
+            mask_presence_indicator = np.zeros(gt_is_group_of_list.shape, dtype=bool)
+            iou_b, ioa_b, scores, num_detected_boxes = self._get_overlaps_and_scores_box_mode(
+                detected_boxes=detected_boxes,
+                detected_scores=detected_scores,
+                gt_boxes=gt_boxes,
+                gt_is_group_of_list=gt_is_group_of_list)
+
+        if gt_boxes.size == 0:
+            return scores, np.zeros(num_detected_boxes, dtype=bool)
+
+        tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool)
+        is_matched_to_box = np.zeros(num_detected_boxes, dtype=bool)
+        is_matched_to_difficult = np.zeros(num_detected_boxes, dtype=bool)
+        is_matched_to_group_of = np.zeros(num_detected_boxes, dtype=bool)
+
+        def compute_match_iou(iou_matrix, gt_nongroup_of_is_difficult_list, is_box):
+            """Computes TP/FP for non group-of box matching.
+            The function updates the following local variables:
+                tp_fp_labels - if a box is matched to group-of
+                is_matched_to_difficult - the detections that were processed at this are
+                    matched to difficult box.
+                is_matched_to_box - the detections that were processed at this stage are marked as is_box.
+            Args:
+                iou_matrix: intersection-over-union matrix [num_gt_boxes]x[num_det_boxes].
+                gt_nongroup_of_is_difficult_list: boolean that specifies if gt box is difficult.
+                is_box: boolean that specifies if currently boxes or masks are processed.
+            """
+            max_overlap_gt_ids = np.argmax(iou_matrix, axis=1)
+            is_gt_detected = np.zeros(iou_matrix.shape[1], dtype=bool)
+            for i in range(num_detected_boxes):
+                gt_id = max_overlap_gt_ids[i]
+                is_evaluatable = (
+                    not tp_fp_labels[i] and
+                    not is_matched_to_difficult[i] and
+                    iou_matrix[i, gt_id] >= self.matching_iou_threshold and
+                    not is_matched_to_group_of[i])
+                if is_evaluatable:
+                    if not gt_nongroup_of_is_difficult_list[gt_id]:
+                        if not is_gt_detected[gt_id]:
+                            tp_fp_labels[i] = True
+                            is_gt_detected[gt_id] = True
+                            is_matched_to_box[i] = is_box
+                    else:
+                        is_matched_to_difficult[i] = True
+
+        def compute_match_ioa(ioa_matrix, is_box):
+            """Computes TP/FP for group-of box matching.
+            The function updates the following local variables:
+                is_matched_to_group_of - if a box is matched to group-of
+                is_matched_to_box - the detections that were processed at this stage are marked as is_box.
+            Args:
+                ioa_matrix: intersection-over-area matrix [num_gt_boxes]x[num_det_boxes].
+                is_box: boolean that specifies if currently boxes or masks are processed.
+            Returns:
+                scores_group_of: of detections matched to group-of boxes[num_groupof_matched].
+                tp_fp_labels_group_of: boolean array of size [num_groupof_matched], all values are True.
+            """
+            scores_group_of = np.zeros(ioa_matrix.shape[1], dtype=float)
+            tp_fp_labels_group_of = self.group_of_weight * np.ones(ioa_matrix.shape[1], dtype=float)
+            max_overlap_group_of_gt_ids = np.argmax(ioa_matrix, axis=1)
+            for i in range(num_detected_boxes):
+                gt_id = max_overlap_group_of_gt_ids[i]
+                is_evaluatable = (
+                    not tp_fp_labels[i] and
+                    not is_matched_to_difficult[i] and
+                    ioa_matrix[i, gt_id] >= self.matching_iou_threshold and
+                    not is_matched_to_group_of[i])
+                if is_evaluatable:
+                    is_matched_to_group_of[i] = True
+                    is_matched_to_box[i] = is_box
+                    scores_group_of[gt_id] = max(scores_group_of[gt_id], scores[i])
+            selector = np.where((scores_group_of > 0) & (tp_fp_labels_group_of > 0))
+            scores_group_of = scores_group_of[selector]
+            tp_fp_labels_group_of = tp_fp_labels_group_of[selector]
+
+            return scores_group_of, tp_fp_labels_group_of
+
+        # The evaluation is done in two stages:
+        # 1. Evaluate all objects that actually have instance level masks.
+        # 2. Evaluate all objects that are not already evaluated as boxes.
+        if iou_m.shape[1] > 0:
+            gt_is_difficult_mask_list = gt_is_difficult_list[mask_presence_indicator]
+            gt_is_group_of_mask_list = gt_is_group_of_list[mask_presence_indicator]
+            compute_match_iou(iou_m, gt_is_difficult_mask_list[~gt_is_group_of_mask_list], is_box=False)
+
+        scores_mask_group_of = np.ndarray([0], dtype=float)
+        tp_fp_labels_mask_group_of = np.ndarray([0], dtype=float)
+        if ioa_m.shape[1] > 0:
+            scores_mask_group_of, tp_fp_labels_mask_group_of = compute_match_ioa(ioa_m, is_box=False)
+
+        # Tp-fp evaluation for non-group of boxes (if any).
+        if iou_b.shape[1] > 0:
+            gt_is_difficult_box_list = gt_is_difficult_list[~mask_presence_indicator]
+            gt_is_group_of_box_list = gt_is_group_of_list[~mask_presence_indicator]
+            compute_match_iou(iou_b, gt_is_difficult_box_list[~gt_is_group_of_box_list], is_box=True)
+
+        scores_box_group_of = np.ndarray([0], dtype=float)
+        tp_fp_labels_box_group_of = np.ndarray([0], dtype=float)
+        if ioa_b.shape[1] > 0:
+            scores_box_group_of, tp_fp_labels_box_group_of = compute_match_ioa(ioa_b, is_box=True)
+
+        if mask_mode:
+            # Note: here crowds are treated as ignore regions.
+            valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of & ~is_matched_to_box)
+            return np.concatenate((scores[valid_entries], scores_mask_group_of)),\
+                   np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_mask_group_of))
+        else:
+            valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of)
+            return np.concatenate((scores[valid_entries], scores_box_group_of)),\
+                   np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_box_group_of))
+
+    def _get_ith_class_arrays(
+            self, detected_boxes, detected_scores, detected_masks, detected_class_labels,
+            gt_boxes, gt_masks, gt_class_labels, class_index):
+        """Returns numpy arrays belonging to class with index `class_index`.
+        Args:
+            detected_boxes: A numpy array containing detected boxes.
+            detected_scores: A numpy array containing detected scores.
+            detected_masks: A numpy array containing detected masks.
+            detected_class_labels: A numpy array containing detected class labels.
+            gt_boxes: A numpy array containing groundtruth boxes.
+            gt_masks: A numpy array containing groundtruth masks.
+            gt_class_labels: A numpy array containing groundtruth class labels.
+            class_index: An integer index.
+        Returns:
+            gt_boxes_at_ith_class: A numpy array containing groundtruth boxes labeled as ith class.
+            gt_masks_at_ith_class: A numpy array containing groundtruth masks labeled as ith class.
+            detected_boxes_at_ith_class: A numpy array containing detected boxes corresponding to the ith class.
+            detected_scores_at_ith_class: A numpy array containing detected scores corresponding to the ith class.
+            detected_masks_at_ith_class: A numpy array containing detected masks corresponding to the ith class.
+        """
+        selected_groundtruth = (gt_class_labels == class_index)
+        gt_boxes_at_ith_class = gt_boxes[selected_groundtruth]
+        if gt_masks is not None:
+            gt_masks_at_ith_class = gt_masks[selected_groundtruth]
+        else:
+            gt_masks_at_ith_class = None
+        selected_detections = (detected_class_labels == class_index)
+        detected_boxes_at_ith_class = detected_boxes[selected_detections]
+        detected_scores_at_ith_class = detected_scores[selected_detections]
+        if detected_masks is not None:
+            detected_masks_at_ith_class = detected_masks[selected_detections]
+        else:
+            detected_masks_at_ith_class = None
+        return (gt_boxes_at_ith_class, gt_masks_at_ith_class,
+                detected_boxes_at_ith_class, detected_scores_at_ith_class,
+                detected_masks_at_ith_class)
+
+    def _remove_invalid_boxes(
+            self, detected_boxes, detected_scores, detected_class_labels, detected_masks=None):
+        """Removes entries with invalid boxes.
+        A box is invalid if either its xmax is smaller than its xmin, or its ymax is smaller than its ymin.
+        Args:
+            detected_boxes: A float numpy array of size [num_boxes, 4] containing box
+                coordinates in [ymin, xmin, ymax, xmax] format.
+            detected_scores: A float numpy array of size [num_boxes].
+            detected_class_labels: A int32 numpy array of size [num_boxes].
+            detected_masks: A uint8 numpy array of size [num_boxes, height, width].
+        Returns:
+            valid_detected_boxes: A float numpy array of size [num_valid_boxes, 4]
+                containing box coordinates in [ymin, xmin, ymax, xmax] format.
+            valid_detected_scores: A float numpy array of size [num_valid_boxes].
+            valid_detected_class_labels: A int32 numpy array of size [num_valid_boxes].
+            valid_detected_masks: A uint8 numpy array of size [num_valid_boxes, height, width].
+        """
+        valid_indices = np.logical_and(
+            detected_boxes[:, 0] < detected_boxes[:, 2], detected_boxes[:, 1] < detected_boxes[:, 3])
+        detected_boxes = detected_boxes[valid_indices]
+        detected_scores = detected_scores[valid_indices]
+        detected_class_labels = detected_class_labels[valid_indices]
+        if detected_masks is not None:
+            detected_masks = detected_masks[valid_indices]
+        return [detected_boxes, detected_scores, detected_class_labels, detected_masks]
+
+
diff --git a/efficientdet/effdet/evaluator.py b/efficientdet/effdet/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f32e673ea33e11a9bb45cc6d40c53667d4485408
--- /dev/null
+++ b/efficientdet/effdet/evaluator.py
@@ -0,0 +1,195 @@
+import torch
+import torch.distributed as dist
+import abc
+import json
+import logging
+import time
+import numpy as np
+
+from .distributed import synchronize, is_main_process, all_gather_container
+from pycocotools.cocoeval import COCOeval
+
+# FIXME experimenting with speedups for OpenImages eval, it's slow
+#import pyximport; py_importer, pyx_importer = pyximport.install(pyimport=True)
+import effdet.evaluation.detection_evaluator as tfm_eval
+#pyximport.uninstall(py_importer, pyx_importer)
+
+_logger = logging.getLogger(__name__)
+
+
+__all__ = ['CocoEvaluator', 'PascalEvaluator', 'OpenImagesEvaluator', 'create_evaluator']
+
+
+class Evaluator:
+
+    def __init__(self, distributed=False, pred_yxyx=False):
+        self.distributed = distributed
+        self.distributed_device = None
+        self.pred_yxyx = pred_yxyx
+        self.img_indices = []
+        self.predictions = []
+
+    def add_predictions(self, detections, target):
+        if self.distributed:
+            if self.distributed_device is None:
+                # cache for use later to broadcast end metric
+                self.distributed_device = detections.device
+            synchronize()
+            detections = all_gather_container(detections)
+            img_indices = all_gather_container(target['img_idx'])
+            if not is_main_process():
+                return
+        else:
+            img_indices = target['img_idx']
+
+        detections = detections.cpu().numpy()
+        img_indices = img_indices.cpu().numpy()
+        for img_idx, img_dets in zip(img_indices, detections):
+            self.img_indices.append(img_idx)
+            self.predictions.append(img_dets)
+
+    def _coco_predictions(self):
+        # generate coco-style predictions
+        coco_predictions = []
+        coco_ids = []
+        for img_idx, img_dets in zip(self.img_indices, self.predictions):
+            img_id = self._dataset.img_ids[img_idx]
+            coco_ids.append(img_id)
+            if self.pred_yxyx:
+                # to xyxy
+                img_dets[:, 0:4] = img_dets[:, [1, 0, 3, 2]]
+            # to xywh
+            img_dets[:, 2] -= img_dets[:, 0]
+            img_dets[:, 3] -= img_dets[:, 1]
+            for det in img_dets:
+                score = float(det[4])
+                if score < .001:  # stop when below this threshold, scores in descending order
+                    break
+                coco_det = dict(
+                    image_id=int(img_id),
+                    bbox=det[0:4].tolist(),
+                    score=score,
+                    category_id=int(det[5]))
+                coco_predictions.append(coco_det)
+        return coco_predictions, coco_ids
+
+    @abc.abstractmethod
+    def evaluate(self):
+        pass
+
+    def save(self, result_file):
+        # save results in coco style, override to save in a alternate form
+        if not self.distributed or dist.get_rank() == 0:
+            assert len(self.predictions)
+            coco_predictions, coco_ids = self._coco_predictions()
+            json.dump(coco_predictions, open(result_file, 'w'), indent=4)
+
+
+class CocoEvaluator(Evaluator):
+
+    def __init__(self, dataset, neptune=None, distributed=False, pred_yxyx=False):
+        super().__init__(distributed=distributed, pred_yxyx=pred_yxyx)
+        self._dataset = dataset.parser
+        self.coco_api = dataset.parser.coco
+        self.neptune = neptune
+
+    def reset(self):
+        self.img_indices = []
+        self.predictions = []
+
+    def evaluate(self):
+        if not self.distributed or dist.get_rank() == 0:
+            assert len(self.predictions)
+            coco_predictions, coco_ids = self._coco_predictions()
+            json.dump(coco_predictions, open('./temp.json', 'w'), indent=4)
+            results = self.coco_api.loadRes('./temp.json')
+            coco_eval = COCOeval(self.coco_api, results, 'bbox')
+            coco_eval.params.imgIds = coco_ids  # score only ids we've used
+            coco_eval.evaluate()
+            coco_eval.accumulate()
+            coco_eval.summarize()
+            metric = coco_eval.stats[0]  # mAP 0.5-0.95
+            if self.neptune:
+                self.neptune.log_metric('valid/mAP/0.5-0.95IOU', metric)
+                self.neptune.log_metric('valid/mAP/0.5IOU', coco_eval.stats[1])
+            if self.distributed:
+                dist.broadcast(torch.tensor(metric, device=self.distributed_device), 0)
+        else:
+            metric = torch.tensor(0, device=self.distributed_device)
+            dist.broadcast(metric, 0)
+            metric = metric.item()
+        self.reset()
+        return metric
+
+
+class TfmEvaluator(Evaluator):
+    """ Tensorflow Models Evaluator Wrapper """
+    def __init__(
+            self, dataset, neptune=None, distributed=False, pred_yxyx=False, 
+            evaluator_cls=tfm_eval.ObjectDetectionEvaluator):
+        super().__init__(distributed=distributed, pred_yxyx=pred_yxyx)
+        self._evaluator = evaluator_cls(categories=dataset.parser.cat_dicts)
+        self._eval_metric_name = self._evaluator._metric_names[0]
+        self._dataset = dataset.parser
+        self.neptune = neptune
+
+    def reset(self):
+        self._evaluator.clear()
+        self.img_indices = []
+        self.predictions = []
+
+    def evaluate(self):
+        if not self.distributed or dist.get_rank() == 0:
+            for img_idx, img_dets in zip(self.img_indices, self.predictions):
+                gt = self._dataset.get_ann_info(img_idx)
+                self._evaluator.add_single_ground_truth_image_info(img_idx, gt)
+
+                bbox = img_dets[:, 0:4] if self.pred_yxyx else img_dets[:, [1, 0, 3, 2]]
+                det = dict(bbox=bbox, score=img_dets[:, 4], cls=img_dets[:, 5])
+                self._evaluator.add_single_detected_image_info(img_idx, det)
+
+            metrics = self._evaluator.evaluate()
+            _logger.info('Metrics:')
+            for k, v in metrics.items():
+                _logger.info(f'{k}: {v}')
+                if self.neptune:
+                    key = 'valid/mAP/' + str(k).split('/')[-1]
+                    self.neptune.log_metric(key, v)
+                
+            map_metric = metrics[self._eval_metric_name]
+            if self.distributed:
+                dist.broadcast(torch.tensor(map_metric, device=self.distributed_device), 0)
+        else:
+            map_metric = torch.tensor(0, device=self.distributed_device)
+            wait = dist.broadcast(map_metric, 0, async_op=True)
+            while not wait.is_completed():
+                # wait without spinning the cpu @ 100%, no need for low latency here
+                time.sleep(0.5)
+            map_metric = map_metric.item()
+        self.reset()
+        return map_metric
+
+
+class PascalEvaluator(TfmEvaluator):
+
+    def __init__(self, dataset, neptune=None, distributed=False, pred_yxyx=False):
+        super().__init__(
+            dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx, evaluator_cls=tfm_eval.PascalDetectionEvaluator)
+
+
+class OpenImagesEvaluator(TfmEvaluator):
+
+    def __init__(self, dataset, distributed=False, pred_yxyx=False):
+        super().__init__(
+            dataset, distributed=distributed, pred_yxyx=pred_yxyx, evaluator_cls=tfm_eval.OpenImagesDetectionEvaluator)
+
+
+def create_evaluator(name, dataset, neptune=None, distributed=False, pred_yxyx=False):
+    # FIXME support OpenImages Challenge2019 metric w/ image level label consideration
+    if 'coco' in name:
+        return CocoEvaluator(dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx)
+    elif 'openimages' in name:
+        return OpenImagesEvaluator(dataset, distributed=distributed, pred_yxyx=pred_yxyx)
+    else:
+        return CocoEvaluator(dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx)
+        #return PascalEvaluator(dataset, neptune, distributed=distributed, pred_yxyx=pred_yxyx)
diff --git a/efficientdet/effdet/factory.py b/efficientdet/effdet/factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbf8abf4f229cfe3cf503805183ff2a8bf0e2973
--- /dev/null
+++ b/efficientdet/effdet/factory.py
@@ -0,0 +1,54 @@
+from .efficientdet import EfficientDet, HeadNet
+from .bench import DetBenchTrain, DetBenchPredict
+from .config import get_efficientdet_config
+from .helpers import load_pretrained, load_checkpoint
+
+
+def create_model(
+        model_name, bench_task='', num_classes=None, pretrained=False,
+        checkpoint_path='', checkpoint_ema=False, **kwargs):
+
+    config = get_efficientdet_config(model_name)
+    return create_model_from_config(
+        config, bench_task=bench_task, num_classes=num_classes, pretrained=pretrained,
+        checkpoint_path=checkpoint_path, checkpoint_ema=checkpoint_ema, **kwargs)
+
+
+def create_model_from_config(
+        config, bench_task='', num_classes=None, pretrained=False,
+        checkpoint_path='', checkpoint_ema=False, **kwargs):
+
+    pretrained_backbone = kwargs.pop('pretrained_backbone', True)
+    if pretrained or checkpoint_path:
+        pretrained_backbone = False  # no point in loading backbone weights
+
+    # Config overrides, override some config values via kwargs.
+    overrides = ('redundant_bias', 'label_smoothing', 'new_focal', 'jit_loss')
+    for ov in overrides:
+        value = kwargs.pop(ov, None)
+        if value is not None:
+            setattr(config, ov, value)
+
+    labeler = kwargs.pop('bench_labeler', False)
+
+    # create the base model
+    model = EfficientDet(config, pretrained_backbone=pretrained_backbone, **kwargs)
+
+    # pretrained weights are always spec'd for original config, load them before we change the model
+    if pretrained:
+        load_pretrained(model, config.url)
+
+    # reset model head if num_classes doesn't match configs
+    if num_classes is not None and num_classes != config.num_classes:
+        model.reset_head(num_classes=num_classes)
+
+    # load an argument specified training checkpoint
+    if checkpoint_path:
+        load_checkpoint(model, checkpoint_path, use_ema=checkpoint_ema)
+
+    # wrap model in task specific training/prediction bench if set
+    if bench_task == 'train':
+        model = DetBenchTrain(model, create_labeler=labeler)
+    elif bench_task == 'predict':
+        model = DetBenchPredict(model)
+    return model
diff --git a/efficientdet/effdet/helpers.py b/efficientdet/effdet/helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..597e65df2eb60424ff783156fad90e78be7cb9b9
--- /dev/null
+++ b/efficientdet/effdet/helpers.py
@@ -0,0 +1,22 @@
+import torch
+import os
+import logging
+from collections import OrderedDict
+
+from timm.models import load_checkpoint
+
+try:
+    from torch.hub import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+
+
+def load_pretrained(model, url, filter_fn=None, strict=True):
+    if not url:
+        logging.warning("Pretrained model URL is empty, using random initialization. "
+                        "Did you intend to use a `tf_` variant of the model?")
+        return
+    state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu')
+    if filter_fn is not None:
+        state_dict = filter_fn(state_dict)
+    model.load_state_dict(state_dict, strict=strict)
diff --git a/efficientdet/effdet/loss.py b/efficientdet/effdet/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..44ddca31f03b27ba86d6a1c955cc85cd722e08ae
--- /dev/null
+++ b/efficientdet/effdet/loss.py
@@ -0,0 +1,259 @@
+""" EfficientDet Focal, Huber/Smooth L1 loss fns w/ jit support
+
+Based on loss fn in Google's automl EfficientDet repository (Apache 2.0 license).
+https://github.com/google/automl/tree/master/efficientdet
+
+Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from typing import Optional, List, Tuple
+
+
+def focal_loss_legacy(logits, targets, alpha: float, gamma: float, normalizer):
+    """Compute the focal loss between `logits` and the golden `target` values.
+
+    'Legacy focal loss matches the loss used in the official Tensorflow impl for initial
+    model releases and some time after that. It eventually transitioned to the 'New' loss
+    defined below.
+
+    Focal loss = -(1-pt)^gamma * log(pt)
+    where pt is the probability of being classified to the true class.
+
+    Args:
+        logits: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+
+        targets: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+
+        alpha: A float32 scalar multiplying alpha to the loss from positive examples
+            and (1-alpha) to the loss from negative examples.
+
+        gamma: A float32 scalar modulating loss from hard and easy examples.
+
+         normalizer: A float32 scalar normalizes the total loss from all examples.
+
+    Returns:
+        loss: A float32 scalar representing normalized total loss.
+    """
+    positive_label_mask = targets == 1.0
+    cross_entropy = F.binary_cross_entropy_with_logits(logits, targets.to(logits.dtype), reduction='none')
+    neg_logits = -1.0 * logits
+    modulator = torch.exp(gamma * targets * neg_logits - gamma * torch.log1p(torch.exp(neg_logits)))
+
+    loss = modulator * cross_entropy
+    weighted_loss = torch.where(positive_label_mask, alpha * loss, (1.0 - alpha) * loss)
+    return weighted_loss / normalizer
+
+
+def new_focal_loss(logits, targets, alpha: float, gamma: float, normalizer, label_smoothing: float = 0.01):
+    """Compute the focal loss between `logits` and the golden `target` values.
+
+    'New' is not the best descriptor, but this focal loss impl matches recent versions of
+    the official Tensorflow impl of EfficientDet. It has support for label smoothing, however
+    it is a bit slower, doesn't jit optimize well, and uses more memory.
+
+    Focal loss = -(1-pt)^gamma * log(pt)
+    where pt is the probability of being classified to the true class.
+    Args:
+        logits: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+        targets: A float32 tensor of size [batch, height_in, width_in, num_predictions].
+        alpha: A float32 scalar multiplying alpha to the loss from positive examples
+            and (1-alpha) to the loss from negative examples.
+        gamma: A float32 scalar modulating loss from hard and easy examples.
+        normalizer: Divide loss by this value.
+        label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
+    Returns:
+        loss: A float32 scalar representing normalized total loss.
+    """
+    # compute focal loss multipliers before label smoothing, such that it will not blow up the loss.
+    pred_prob = logits.sigmoid()
+    targets = targets.to(logits.dtype)
+    onem_targets = 1. - targets
+    p_t = (targets * pred_prob) + (onem_targets * (1. - pred_prob))
+    alpha_factor = targets * alpha + onem_targets * (1. - alpha)
+    modulating_factor = (1. - p_t) ** gamma
+
+    # apply label smoothing for cross_entropy for each entry.
+    if label_smoothing > 0.:
+        targets = targets * (1. - label_smoothing) + .5 * label_smoothing
+    ce = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')
+
+    # compute the final loss and return
+    return (1 / normalizer) * alpha_factor * modulating_factor * ce
+
+
+def huber_loss(
+        input, target, delta: float = 1., weights: Optional[torch.Tensor] = None, size_average: bool = True):
+    """
+    """
+    err = input - target
+    abs_err = err.abs()
+    quadratic = torch.clamp(abs_err, max=delta)
+    linear = abs_err - quadratic
+    loss = 0.5 * quadratic.pow(2) + delta * linear
+    if weights is not None:
+        loss *= weights
+    if size_average:
+        return loss.mean()
+    else:
+        return loss.sum()
+
+
+def smooth_l1_loss(
+        input, target, beta: float = 1. / 9, weights: Optional[torch.Tensor] = None, size_average: bool = True):
+    """
+    very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter
+    """
+    if beta < 1e-5:
+        # if beta == 0, then torch.where will result in nan gradients when
+        # the chain rule is applied due to pytorch implementation details
+        # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
+        # zeros, rather than "no gradient"). To avoid this issue, we define
+        # small values of beta to be exactly l1 loss.
+        loss = torch.abs(input - target)
+    else:
+        err = torch.abs(input - target)
+        loss = torch.where(err < beta, 0.5 * err.pow(2) / beta, err - 0.5 * beta)
+    if weights is not None:
+        loss *= weights
+    if size_average:
+        return loss.mean()
+    else:
+        return loss.sum()
+
+
+def _box_loss(box_outputs, box_targets, num_positives, delta: float = 0.1):
+    """Computes box regression loss."""
+    # delta is typically around the mean value of regression target.
+    # for instances, the regression targets of 512x512 input with 6 anchors on
+    # P3-P7 pyramid is about [0.1, 0.1, 0.2, 0.2].
+    normalizer = num_positives * 4.0
+    mask = box_targets != 0.0
+    box_loss = huber_loss(box_outputs, box_targets, weights=mask, delta=delta, size_average=False)
+    return box_loss / normalizer
+
+
+def one_hot(x, num_classes: int):
+    # NOTE: PyTorch one-hot does not handle -ve entries (no hot) like Tensorflow, so mask them out
+    x_non_neg = (x >= 0).unsqueeze(-1)
+    onehot = torch.zeros(x.shape + (num_classes,), device=x.device, dtype=torch.float32)
+    return onehot.scatter(-1, x.unsqueeze(-1) * x_non_neg, 1) * x_non_neg
+
+
+def loss_fn(
+        cls_outputs: List[torch.Tensor],
+        box_outputs: List[torch.Tensor],
+        cls_targets: List[torch.Tensor],
+        box_targets: List[torch.Tensor],
+        num_positives: torch.Tensor,
+        num_classes: int,
+        alpha: float,
+        gamma: float,
+        delta: float,
+        box_loss_weight: float,
+        label_smoothing: float = 0.,
+        new_focal: bool = False,
+        ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Computes total detection loss.
+    Computes total detection loss including box and class loss from all levels.
+    Args:
+        cls_outputs: a List with values representing logits in [batch_size, height, width, num_anchors].
+            at each feature level (index)
+
+        box_outputs: a List with values representing box regression targets in
+            [batch_size, height, width, num_anchors * 4] at each feature level (index)
+
+        cls_targets: groundtruth class targets.
+
+        box_targets: groundtrusth box targets.
+
+        num_positives: num positive grountruth anchors
+
+    Returns:
+        total_loss: an integer tensor representing total loss reducing from class and box losses from all levels.
+
+        cls_loss: an integer tensor representing total class loss.
+
+        box_loss: an integer tensor representing total box regression loss.
+    """
+    # Sum all positives in a batch for normalization and avoid zero
+    # num_positives_sum, which would lead to inf loss during training
+    num_positives_sum = (num_positives.sum() + 1.0).float()
+    levels = len(cls_outputs)
+
+    cls_losses = []
+    box_losses = []
+    for l in range(levels):
+        cls_targets_at_level = cls_targets[l]
+        box_targets_at_level = box_targets[l]
+
+        # Onehot encoding for classification labels.
+        cls_targets_at_level_oh = one_hot(cls_targets_at_level, num_classes)
+
+        bs, height, width, _, _ = cls_targets_at_level_oh.shape
+        cls_targets_at_level_oh = cls_targets_at_level_oh.view(bs, height, width, -1)
+        cls_outputs_at_level = cls_outputs[l].permute(0, 2, 3, 1).float()
+        if new_focal:
+            cls_loss = new_focal_loss(
+                cls_outputs_at_level, cls_targets_at_level_oh,
+                alpha=alpha, gamma=gamma, normalizer=num_positives_sum, label_smoothing=label_smoothing)
+        else:
+            cls_loss = focal_loss_legacy(
+                cls_outputs_at_level, cls_targets_at_level_oh,
+                alpha=alpha, gamma=gamma, normalizer=num_positives_sum)
+        cls_loss = cls_loss.view(bs, height, width, -1, num_classes)
+        cls_loss = cls_loss * (cls_targets_at_level != -2).unsqueeze(-1)
+        cls_losses.append(cls_loss.sum())   # FIXME reference code added a clamp here at some point ...clamp(0, 2))
+
+        box_losses.append(_box_loss(
+            box_outputs[l].permute(0, 2, 3, 1).float(),
+            box_targets_at_level,
+            num_positives_sum,
+            delta=delta))
+
+    # Sum per level losses to total loss.
+    cls_loss = torch.sum(torch.stack(cls_losses, dim=-1), dim=-1)
+    box_loss = torch.sum(torch.stack(box_losses, dim=-1), dim=-1)
+    total_loss = cls_loss + box_loss_weight * box_loss
+    return total_loss, cls_loss, box_loss
+
+
+loss_jit = torch.jit.script(loss_fn)
+
+
+class DetectionLoss(nn.Module):
+
+    __constants__ = ['num_classes']
+
+    def __init__(self, config):
+        super(DetectionLoss, self).__init__()
+        self.config = config
+        self.num_classes = config.num_classes
+        self.alpha = config.alpha
+        self.gamma = config.gamma
+        self.delta = config.delta
+        self.box_loss_weight = config.box_loss_weight
+        self.label_smoothing = config.label_smoothing
+        self.new_focal = config.new_focal
+        self.use_jit = config.jit_loss
+
+    def forward(
+            self,
+            cls_outputs: List[torch.Tensor],
+            box_outputs: List[torch.Tensor],
+            cls_targets: List[torch.Tensor],
+            box_targets: List[torch.Tensor],
+            num_positives: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+
+        l_fn = loss_fn
+        if not torch.jit.is_scripting() and self.use_jit:
+            # This branch only active if parent / bench itself isn't being scripted
+            # NOTE: I haven't figured out what to do here wrt to tracing, is it an issue?
+            l_fn = loss_jit
+
+        return l_fn(
+            cls_outputs, box_outputs, cls_targets, box_targets, num_positives,
+            num_classes=self.num_classes, alpha=self.alpha, gamma=self.gamma, delta=self.delta,
+            box_loss_weight=self.box_loss_weight, label_smoothing=self.label_smoothing, new_focal=self.new_focal)
diff --git a/efficientdet/effdet/object_detection/README.md b/efficientdet/effdet/object_detection/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c2ed3902017e307fc5af61cdf3710ec02aa8c213
--- /dev/null
+++ b/efficientdet/effdet/object_detection/README.md
@@ -0,0 +1,3 @@
+# Tensorflow Object Detection
+
+All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection
\ No newline at end of file
diff --git a/efficientdet/effdet/object_detection/__init__.py b/efficientdet/effdet/object_detection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5679660c5c4006a824117c84f49f7b2e0e1c2703
--- /dev/null
+++ b/efficientdet/effdet/object_detection/__init__.py
@@ -0,0 +1,22 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Object detection data loaders and libraries are mostly based on RetinaNet:
+# https://github.com/tensorflow/tpu/tree/master/models/official/retinanet
+from .argmax_matcher import ArgMaxMatcher
+from .box_coder import FasterRcnnBoxCoder
+from .box_list import BoxList
+from .matcher import Match
+from .region_similarity_calculator import IouSimilarity
+from .target_assigner import TargetAssigner
diff --git a/efficientdet/effdet/object_detection/argmax_matcher.py b/efficientdet/effdet/object_detection/argmax_matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b98b7a90f986184d2148c80eaec02f7f112016c
--- /dev/null
+++ b/efficientdet/effdet/object_detection/argmax_matcher.py
@@ -0,0 +1,174 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Argmax matcher implementation.
+
+This class takes a similarity matrix and matches columns to rows based on the
+maximum value per column. One can specify matched_thresholds and
+to prevent columns from matching to rows (generally resulting in a negative
+training example) and unmatched_theshold to ignore the match (generally
+resulting in neither a positive or negative training example).
+
+This matcher is used in Fast(er)-RCNN.
+
+Note: matchers are used in TargetAssigners. There is a create_target_assigner
+factory function for popular implementations.
+"""
+import torch
+from .matcher import Match
+from typing import Optional
+
+
+def one_hot_bool(x, num_classes: int):
+    # for improved perf over PyTorch builtin one_hot, scatter to bool
+    onehot = torch.zeros(x.size(0), num_classes, device=x.device, dtype=torch.bool)
+    return onehot.scatter_(1, x.unsqueeze(1), 1)
+
+
+@torch.jit.script
+class ArgMaxMatcher(object):  # cannot inherit with torchscript
+    """Matcher based on highest value.
+
+    This class computes matches from a similarity matrix. Each column is matched
+    to a single row.
+
+    To support object detection target assignment this class enables setting both
+    matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
+    defining three categories of similarity which define whether examples are
+    positive, negative, or ignored:
+    (1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
+    (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
+            Depending on negatives_lower_than_unmatched, this is either
+            Unmatched/Negative OR Ignore.
+    (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
+            negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
+    For ignored matches this class sets the values in the Match object to -2.
+    """
+
+    def __init__(self,
+                 matched_threshold: float,
+                 unmatched_threshold: Optional[float] = None,
+                 negatives_lower_than_unmatched: bool = True,
+                 force_match_for_each_row: bool = False):
+        """Construct ArgMaxMatcher.
+
+        Args:
+            matched_threshold: Threshold for positive matches. Positive if
+                sim >= matched_threshold, where sim is the maximum value of the
+                similarity matrix for a given column. Set to None for no threshold.
+            unmatched_threshold: Threshold for negative matches. Negative if
+                sim < unmatched_threshold. Defaults to matched_threshold
+                when set to None.
+            negatives_lower_than_unmatched: Boolean which defaults to True. If True
+                then negative matches are the ones below the unmatched_threshold,
+                whereas ignored matches are in between the matched and unmatched
+                threshold. If False, then negative matches are in between the matched
+                and unmatched threshold, and everything lower than unmatched is ignored.
+            force_match_for_each_row: If True, ensures that each row is matched to
+                at least one column (which is not guaranteed otherwise if the
+                matched_threshold is high). Defaults to False. See
+                argmax_matcher_test.testMatcherForceMatch() for an example.
+
+        Raises:
+            ValueError: if unmatched_threshold is set but matched_threshold is not set
+                or if unmatched_threshold > matched_threshold.
+        """
+        if (matched_threshold is None) and (unmatched_threshold is not None):
+            raise ValueError('Need to also define matched_threshold when unmatched_threshold is defined')
+        self._matched_threshold = matched_threshold
+        self._unmatched_threshold: float = 0.
+        if unmatched_threshold is None:
+            self._unmatched_threshold = matched_threshold
+        else:
+            if unmatched_threshold > matched_threshold:
+                raise ValueError('unmatched_threshold needs to be smaller or equal to matched_threshold')
+            self._unmatched_threshold = unmatched_threshold
+        if not negatives_lower_than_unmatched:
+            if self._unmatched_threshold == self._matched_threshold:
+                raise ValueError('When negatives are in between matched and unmatched thresholds, these '
+                                 'cannot be of equal value. matched: %s, unmatched: %s',
+                                 self._matched_threshold, self._unmatched_threshold)
+        self._force_match_for_each_row = force_match_for_each_row
+        self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
+
+    def _match_when_rows_are_empty(self, similarity_matrix):
+        """Performs matching when the rows of similarity matrix are empty.
+
+        When the rows are empty, all detections are false positives. So we return
+        a tensor of -1's to indicate that the columns do not match to any rows.
+
+        Returns:
+            matches:  int32 tensor indicating the row each column matches to.
+        """
+        return -1 * torch.ones(similarity_matrix.shape[1], dtype=torch.long, device=similarity_matrix.device)
+
+    def _match_when_rows_are_non_empty(self, similarity_matrix):
+        """Performs matching when the rows of similarity matrix are non empty.
+
+        Returns:
+            matches:  int32 tensor indicating the row each column matches to.
+        """
+        # Matches for each column
+        matched_vals, matches = torch.max(similarity_matrix, 0)
+
+        # Deal with matched and unmatched threshold
+        if self._matched_threshold is not None:
+            # Get logical indices of ignored and unmatched columns as tf.int64
+            below_unmatched_threshold = self._unmatched_threshold > matched_vals
+            between_thresholds = (matched_vals >= self._unmatched_threshold) & \
+                                 (self._matched_threshold > matched_vals)
+
+            if self._negatives_lower_than_unmatched:
+                matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -1)
+                matches = self._set_values_using_indicator(matches, between_thresholds, -2)
+            else:
+                matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -2)
+                matches = self._set_values_using_indicator(matches, between_thresholds, -1)
+
+        if self._force_match_for_each_row:
+            force_match_column_ids = torch.argmax(similarity_matrix, 1)
+            force_match_column_indicators = one_hot_bool(force_match_column_ids, similarity_matrix.shape[1])
+            force_match_column_mask, force_match_row_ids = torch.max(force_match_column_indicators, 0)
+            final_matches = torch.where(force_match_column_mask, force_match_row_ids, matches)
+            return final_matches
+        else:
+            return matches
+
+    def match(self, similarity_matrix):
+        """Tries to match each column of the similarity matrix to a row.
+
+        Args:
+            similarity_matrix: tensor of shape [N, M] representing any similarity metric.
+
+        Returns:
+            Match object with corresponding matches for each of M columns.
+        """
+        if similarity_matrix.shape[0] == 0:
+            return Match(self._match_when_rows_are_empty(similarity_matrix))
+        else:
+            return Match(self._match_when_rows_are_non_empty(similarity_matrix))
+
+    def _set_values_using_indicator(self, x, indicator, val: int):
+        """Set the indicated fields of x to val.
+
+        Args:
+            x: tensor.
+            indicator: boolean with same shape as x.
+            val: scalar with value to set.
+
+        Returns:
+            modified tensor.
+        """
+        indicator = indicator.to(dtype=x.dtype)
+        return x * (1 - indicator) + val * indicator
diff --git a/efficientdet/effdet/object_detection/box_coder.py b/efficientdet/effdet/object_detection/box_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfdccd76f4232804d77bf94c9fb82de5c66d0b48
--- /dev/null
+++ b/efficientdet/effdet/object_detection/box_coder.py
@@ -0,0 +1,172 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base box coder.
+
+Box coders convert between coordinate frames, namely image-centric
+(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
+defined by a specific anchor).
+
+Users of a BoxCoder can call two methods:
+ encode: which encodes a box with respect to a given anchor
+  (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
+ decode: which inverts this encoding with a decode operation.
+In both cases, the arguments are assumed to be in 1-1 correspondence already;
+it is not the job of a BoxCoder to perform matching.
+"""
+import torch
+from typing import List, Optional
+from .box_list import BoxList
+
+# Box coder types.
+FASTER_RCNN = 'faster_rcnn'
+KEYPOINT = 'keypoint'
+MEAN_STDDEV = 'mean_stddev'
+SQUARE = 'square'
+
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+
+EPS = 1e-8
+
+
+#@torch.jit.script
+class FasterRcnnBoxCoder(object):
+    """Faster RCNN box coder."""
+
+    def __init__(self, scale_factors: Optional[List[float]] = None, eps: float = EPS):
+        """Constructor for FasterRcnnBoxCoder.
+
+        Args:
+            scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+                If set to None, does not perform scaling. For Faster RCNN,
+                the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+        """
+        self._scale_factors = scale_factors
+        if scale_factors is not None:
+            assert len(scale_factors) == 4
+            for scalar in scale_factors:
+                assert scalar > 0
+        self.eps = eps
+
+    #@property
+    def code_size(self):
+        return 4
+
+    def encode(self, boxes: BoxList, anchors: BoxList):
+        """Encode a box collection with respect to anchor collection.
+
+        Args:
+            boxes: BoxList holding N boxes to be encoded.
+            anchors: BoxList of anchors.
+
+        Returns:
+            a tensor representing N anchor-encoded boxes of the format [ty, tx, th, tw].
+        """
+        # Convert anchors to the center coordinate representation.
+        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+        ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+        # Avoid NaN in division and log below.
+        ha += self.eps
+        wa += self.eps
+        h += self.eps
+        w += self.eps
+
+        tx = (xcenter - xcenter_a) / wa
+        ty = (ycenter - ycenter_a) / ha
+        tw = torch.log(w / wa)
+        th = torch.log(h / ha)
+        # Scales location targets as used in paper for joint training.
+        if self._scale_factors is not None:
+            ty *= self._scale_factors[0]
+            tx *= self._scale_factors[1]
+            th *= self._scale_factors[2]
+            tw *= self._scale_factors[3]
+        return torch.stack([ty, tx, th, tw]).t()
+
+    def decode(self, rel_codes, anchors: BoxList):
+        """Decode relative codes to boxes.
+
+        Args:
+            rel_codes: a tensor representing N anchor-encoded boxes.
+            anchors: BoxList of anchors.
+
+        Returns:
+            boxes: BoxList holding N bounding boxes.
+        """
+        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+        ty, tx, th, tw = rel_codes.t().unbind()
+        if self._scale_factors is not None:
+            ty /= self._scale_factors[0]
+            tx /= self._scale_factors[1]
+            th /= self._scale_factors[2]
+            tw /= self._scale_factors[3]
+        w = torch.exp(tw) * wa
+        h = torch.exp(th) * ha
+        ycenter = ty * ha + ycenter_a
+        xcenter = tx * wa + xcenter_a
+        ymin = ycenter - h / 2.
+        xmin = xcenter - w / 2.
+        ymax = ycenter + h / 2.
+        xmax = xcenter + w / 2.
+        return BoxList(torch.stack([ymin, xmin, ymax, xmax]).t())
+
+
+def batch_decode(encoded_boxes, box_coder: FasterRcnnBoxCoder, anchors: BoxList):
+    """Decode a batch of encoded boxes.
+
+    This op takes a batch of encoded bounding boxes and transforms
+    them to a batch of bounding boxes specified by their corners in
+    the order of [y_min, x_min, y_max, x_max].
+
+    Args:
+        encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+            code_size] representing the location of the objects.
+        box_coder: a BoxCoder object.
+        anchors: a BoxList of anchors used to encode `encoded_boxes`.
+
+    Returns:
+        decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, coder_size]
+            representing the corners of the objects in the order of [y_min, x_min, y_max, x_max].
+
+    Raises:
+        ValueError: if batch sizes of the inputs are inconsistent, or if
+        the number of anchors inferred from encoded_boxes and anchors are inconsistent.
+    """
+    assert len(encoded_boxes.shape) == 3
+    if encoded_boxes.shape[1] != anchors.num_boxes():
+        raise ValueError('The number of anchors inferred from encoded_boxes'
+                         ' and anchors are inconsistent: shape[1] of encoded_boxes'
+                         ' %s should be equal to the number of anchors: %s.' %
+                         (encoded_boxes.shape[1], anchors.num_boxes()))
+
+    decoded_boxes = torch.stack([
+        box_coder.decode(boxes, anchors).boxes for boxes in encoded_boxes.unbind()
+    ])
+    return decoded_boxes
diff --git a/efficientdet/effdet/object_detection/box_list.py b/efficientdet/effdet/object_detection/box_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..09b77f3d7aa6a8a97728e13b0bd6d108acec0603
--- /dev/null
+++ b/efficientdet/effdet/object_detection/box_list.py
@@ -0,0 +1,197 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Bounding Box List definition.
+
+BoxList represents a list of bounding boxes as tensorflow
+tensors, where each bounding box is represented as a row of 4 numbers,
+[y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes
+within a given list correspond to a single image.  See also
+box_list.py for common box related operations (such as area, iou, etc).
+
+Optionally, users can add additional related fields (such as weights).
+We assume the following things to be true about fields:
+* they correspond to boxes in the box_list along the 0th dimension
+* they have inferable rank at graph construction time
+* all dimensions except for possibly the 0th can be inferred
+  (i.e., not None) at graph construction time.
+
+Some other notes:
+    * Following tensorflow conventions, we use height, width ordering,
+        and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
+    * Tensors are always provided as (flat) [N, 4] tensors.
+"""
+import torch
+from typing import Optional, List, Dict
+
+
+@torch.jit.script
+class BoxList(object):
+    """Box collection."""
+    data: Dict[str, torch.Tensor]
+
+    def __init__(self, boxes):
+        """Constructs box collection.
+
+        Args:
+            boxes: a tensor of shape [N, 4] representing box corners
+
+        Raises:
+            ValueError: if invalid dimensions for bbox data or if bbox data is not in float32 format.
+        """
+        if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
+            raise ValueError('Invalid dimensions for box data.')
+        if boxes.dtype != torch.float32:
+            raise ValueError('Invalid tensor type: should be tf.float32')
+        self.data = {'boxes': boxes}
+
+    def num_boxes(self):
+        """Returns number of boxes held in collection.
+
+        Returns:
+          a tensor representing the number of boxes held in the collection.
+        """
+        return self.data['boxes'].shape[0]
+
+    def get_all_fields(self):
+        """Returns all fields."""
+        return self.data.keys()
+
+    def get_extra_fields(self):
+        """Returns all non-box fields (i.e., everything not named 'boxes')."""
+        # return [k for k in self.data.keys() if k != 'boxes']  # FIXME torscript doesn't support comprehensions yet
+        extra: List[str] = []
+        for k in self.data.keys():
+            if k != 'boxes':
+                extra.append(k)
+        return extra
+
+    def add_field(self, field: str, field_data: torch.Tensor):
+        """Add field to box list.
+
+        This method can be used to add related box data such as weights/labels, etc.
+
+        Args:
+            field: a string key to access the data via `get`
+            field_data: a tensor containing the data to store in the BoxList
+        """
+        self.data[field] = field_data
+
+    def has_field(self, field: str):
+        return field in self.data
+
+    #@property  # FIXME for torchscript compat
+    def boxes(self):
+        """Convenience function for accessing box coordinates.
+
+        Returns:
+            a tensor with shape [N, 4] representing box coordinates.
+        """
+        return self.get_field('boxes')
+
+    #@boxes.setter  # FIXME for torchscript compat
+    def set_boxes(self, boxes):
+        """Convenience function for setting box coordinates.
+
+        Args:
+            boxes: a tensor of shape [N, 4] representing box corners
+
+        Raises:
+            ValueError: if invalid dimensions for bbox data
+        """
+        if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
+            raise ValueError('Invalid dimensions for box data.')
+        self.data['boxes'] = boxes
+
+    def get_field(self, field: str):
+        """Accesses a box collection and associated fields.
+
+        This function returns specified field with object; if no field is specified,
+        it returns the box coordinates.
+
+        Args:
+            field: this optional string parameter can be used to specify a related field to be accessed.
+
+        Returns:
+            a tensor representing the box collection or an associated field.
+
+        Raises:
+            ValueError: if invalid field
+        """
+        if not self.has_field(field):
+            raise ValueError(f'field {field} does not exist')
+        return self.data[field]
+
+    def set_field(self, field: str, value: torch.Tensor):
+        """Sets the value of a field.
+
+        Updates the field of a box_list with a given value.
+
+        Args:
+            field: (string) name of the field to set value.
+            value: the value to assign to the field.
+
+        Raises:
+            ValueError: if the box_list does not have specified field.
+        """
+        if not self.has_field(field):
+            raise ValueError(f'field {field} does not exist')
+        self.data[field] = value
+
+    def get_center_coordinates_and_sizes(self):
+        """Computes the center coordinates, height and width of the boxes.
+
+        Returns:
+            a list of 4 1-D tensors [ycenter, xcenter, height, width].
+        """
+        box_corners = self.boxes()
+        ymin, xmin, ymax, xmax = box_corners.t().unbind()
+        width = xmax - xmin
+        height = ymax - ymin
+        ycenter = ymin + height / 2.
+        xcenter = xmin + width / 2.
+        return [ycenter, xcenter, height, width]
+
+    def transpose_coordinates(self):
+        """Transpose the coordinate representation in a boxlist.
+
+        """
+        y_min, x_min, y_max, x_max = self.boxes().chunk(4, dim=1)
+        self.set_boxes(torch.cat([x_min, y_min, x_max, y_max], 1))
+
+    def as_tensor_dict(self, fields: Optional[List[str]] = None):
+        """Retrieves specified fields as a dictionary of tensors.
+
+        Args:
+            fields: (optional) list of fields to return in the dictionary.
+                If None (default), all fields are returned.
+
+        Returns:
+            tensor_dict: A dictionary of tensors specified by fields.
+
+        Raises:
+            ValueError: if specified field is not contained in boxlist.
+        """
+        tensor_dict = {}
+        if fields is None:
+            fields = self.get_all_fields()
+        for field in fields:
+            if not self.has_field(field):
+                raise ValueError('boxlist must contain all specified fields')
+            tensor_dict[field] = self.get_field(field)
+        return tensor_dict
+
+    #@property
+    def device(self):
+        return self.data['boxes'].device
diff --git a/efficientdet/effdet/object_detection/matcher.py b/efficientdet/effdet/object_detection/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..22aaab118d2bf7d65f4bb403c0cfd657ec74741c
--- /dev/null
+++ b/efficientdet/effdet/object_detection/matcher.py
@@ -0,0 +1,179 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Matcher interface and Match class.
+
+This module defines the Matcher interface and the Match object. The job of the
+matcher is to match row and column indices based on the similarity matrix and
+other optional parameters. Each column is matched to at most one row. There
+are three possibilities for the matching:
+
+1) match: A column matches a row.
+2) no_match: A column does not match any row.
+3) ignore: A column that is neither 'match' nor no_match.
+
+The ignore case is regularly encountered in object detection: when an anchor has
+a relatively small overlap with a ground-truth box, one neither wants to
+consider this box a positive example (match) nor a negative example (no match).
+
+The Match class is used to store the match results and it provides simple apis
+to query the results.
+"""
+import torch
+
+
+@torch.jit.script
+class Match(object):
+    """Class to store results from the matcher.
+
+    This class is used to store the results from the matcher. It provides
+    convenient methods to query the matching results.
+    """
+
+    def __init__(self, match_results: torch.Tensor):
+        """Constructs a Match object.
+
+        Args:
+            match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
+                meaning that column i is matched with row match_results[i].
+                (2) match_results[i]=-1, meaning that column i is not matched.
+                (3) match_results[i]=-2, meaning that column i is ignored.
+
+        Raises:
+            ValueError: if match_results does not have rank 1 or is not an integer int32 scalar tensor
+        """
+        if len(match_results.shape) != 1:
+            raise ValueError('match_results should have rank 1')
+        if match_results.dtype not in (torch.int32, torch.int64):
+            raise ValueError('match_results should be an int32 or int64 scalar tensor')
+        self.match_results = match_results
+
+    def matched_column_indices(self):
+        """Returns column indices that match to some row.
+
+        The indices returned by this op are always sorted in increasing order.
+
+        Returns:
+            column_indices: int32 tensor of shape [K] with column indices.
+        """
+        return torch.nonzero(self.match_results > -1).flatten().long()
+
+    def matched_column_indicator(self):
+        """Returns column indices that are matched.
+
+        Returns:
+            column_indices: int32 tensor of shape [K] with column indices.
+        """
+        return self.match_results >= 0
+
+    def num_matched_columns(self):
+        """Returns number (int32 scalar tensor) of matched columns."""
+        return self.matched_column_indices().numel()
+
+    def unmatched_column_indices(self):
+        """Returns column indices that do not match any row.
+
+        The indices returned by this op are always sorted in increasing order.
+
+        Returns:
+          column_indices: int32 tensor of shape [K] with column indices.
+        """
+        return torch.nonzero(self.match_results == -1).flatten().long()
+
+    def unmatched_column_indicator(self):
+        """Returns column indices that are unmatched.
+
+        Returns:
+          column_indices: int32 tensor of shape [K] with column indices.
+        """
+        return self.match_results == -1
+
+    def num_unmatched_columns(self):
+        """Returns number (int32 scalar tensor) of unmatched columns."""
+        return self.unmatched_column_indices().numel()
+
+    def ignored_column_indices(self):
+        """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+        The indices returned by this op are always sorted in increasing order.
+
+        Returns:
+          column_indices: int32 tensor of shape [K] with column indices.
+        """
+        return torch.nonzero(self.ignored_column_indicator()).flatten().long()
+
+    def ignored_column_indicator(self):
+        """Returns boolean column indicator where True means the column is ignored.
+
+        Returns:
+            column_indicator: boolean vector which is True for all ignored column indices.
+        """
+        return self.match_results == -2
+
+    def num_ignored_columns(self):
+        """Returns number (int32 scalar tensor) of matched columns."""
+        return self.ignored_column_indices().numel()
+
+    def unmatched_or_ignored_column_indices(self):
+        """Returns column indices that are unmatched or ignored.
+
+        The indices returned by this op are always sorted in increasing order.
+
+        Returns:
+            column_indices: int32 tensor of shape [K] with column indices.
+        """
+        return torch.nonzero(0 > self.match_results).flatten().long()
+
+    def matched_row_indices(self):
+        """Returns row indices that match some column.
+
+        The indices returned by this op are ordered so as to be in correspondence with the output of
+        matched_column_indicator().  For example if self.matched_column_indicator() is [0,2],
+        and self.matched_row_indices() is [7, 3], then we know that column 0 was matched to row 7 and
+        column 2 was matched to row 3.
+
+        Returns:
+            row_indices: int32 tensor of shape [K] with row indices.
+        """
+        return torch.gather(self.match_results, 0, self.matched_column_indices()).flatten().long()
+
+    def gather_based_on_match(self, input_tensor, unmatched_value, ignored_value):
+        """Gathers elements from `input_tensor` based on match results.
+
+        For columns that are matched to a row, gathered_tensor[col] is set to input_tensor[match_results[col]].
+        For columns that are unmatched, gathered_tensor[col] is set to unmatched_value. Finally, for columns that
+        are ignored gathered_tensor[col] is set to ignored_value.
+
+        Note that the input_tensor.shape[1:] must match with unmatched_value.shape
+        and ignored_value.shape
+
+        Args:
+            input_tensor: Tensor to gather values from.
+            unmatched_value: Constant tensor or python scalar value for unmatched columns.
+            ignored_value: Constant tensor or python scalar for ignored columns.
+
+        Returns:
+            gathered_tensor: A tensor containing values gathered from input_tensor.
+                The shape of the gathered tensor is [match_results.shape[0]] + input_tensor.shape[1:].
+        """
+        if isinstance(ignored_value, torch.Tensor):
+            input_tensor = torch.cat([ignored_value, unmatched_value, input_tensor], dim=0)
+        else:
+            # scalars
+            input_tensor = torch.cat([
+                torch.tensor([ignored_value, unmatched_value], dtype=input_tensor.dtype, device=input_tensor.device),
+                input_tensor], dim=0)
+        gather_indices = torch.clamp(self.match_results + 2, min=0)
+        gathered_tensor = torch.index_select(input_tensor, 0, gather_indices)
+        return gathered_tensor
diff --git a/efficientdet/effdet/object_detection/region_similarity_calculator.py b/efficientdet/effdet/object_detection/region_similarity_calculator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6945bc7576f68ee0c5b716b52feef7340d2804c
--- /dev/null
+++ b/efficientdet/effdet/object_detection/region_similarity_calculator.py
@@ -0,0 +1,101 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+import torch
+from .box_list import BoxList
+
+
+def area(boxlist: BoxList):
+    """Computes area of boxes.
+
+    Args:
+        boxlist: BoxList holding N boxes
+
+    Returns:
+        a tensor with shape [N] representing box areas.
+    """
+    y_min, x_min, y_max, x_max = boxlist.boxes().chunk(4, dim=1)
+    out = (y_max - y_min).squeeze(1) * (x_max - x_min).squeeze(1)
+    return out
+
+
+def intersection(boxlist1: BoxList, boxlist2: BoxList):
+    """Compute pairwise intersection areas between boxes.
+
+    Args:
+        boxlist1: BoxList holding N boxes
+        boxlist2: BoxList holding M boxes
+
+    Returns:
+        a tensor with shape [N, M] representing pairwise intersections
+    """
+    y_min1, x_min1, y_max1, x_max1 = boxlist1.boxes().chunk(4, dim=1)
+    y_min2, x_min2, y_max2, x_max2 = boxlist2.boxes().chunk(4, dim=1)
+    all_pairs_min_ymax = torch.min(y_max1, y_max2.t())
+    all_pairs_max_ymin = torch.max(y_min1, y_min2.t())
+    intersect_heights = torch.clamp(all_pairs_min_ymax - all_pairs_max_ymin, min=0)
+    all_pairs_min_xmax = torch.min(x_max1, x_max2.t())
+    all_pairs_max_xmin = torch.max(x_min1, x_min2.t())
+    intersect_widths = torch.clamp(all_pairs_min_xmax - all_pairs_max_xmin, min=0)
+    return intersect_heights * intersect_widths
+
+
+def iou(boxlist1: BoxList, boxlist2: BoxList):
+    """Computes pairwise intersection-over-union between box collections.
+
+    Args:
+        boxlist1: BoxList holding N boxes
+        boxlist2: BoxList holding M boxes
+
+    Returns:
+        a tensor with shape [N, M] representing pairwise iou scores.
+    """
+    intersections = intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = areas1.unsqueeze(1) + areas2.unsqueeze(0) - intersections
+    return torch.where(intersections == 0.0, torch.zeros_like(intersections), intersections / unions)
+
+
+@torch.jit.script
+class IouSimilarity(object):
+    """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+    This class computes pairwise similarity between two BoxLists based on IOU.
+    """
+    def __init__(self):
+        pass
+
+    def compare(self, boxlist1: BoxList, boxlist2: BoxList):
+        """Computes matrix of pairwise similarity between BoxLists.
+
+        This op (to be overridden) computes a measure of pairwise similarity between
+        the boxes in the given BoxLists. Higher values indicate more similarity.
+
+        Note that this method simply measures similarity and does not explicitly
+        perform a matching.
+
+        Args:
+            boxlist1: BoxList holding N boxes.
+            boxlist2: BoxList holding M boxes.
+
+        Returns:
+            a (float32) tensor of shape [N, M] with pairwise similarity score.
+        """
+        return iou(boxlist1, boxlist2)
diff --git a/efficientdet/effdet/object_detection/target_assigner.py b/efficientdet/effdet/object_detection/target_assigner.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b97a4e72827d5295ad4e349601c43c1385031ca
--- /dev/null
+++ b/efficientdet/effdet/object_detection/target_assigner.py
@@ -0,0 +1,266 @@
+# Copyright 2020 Google Research. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+  provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import torch
+from typing import Optional
+
+from . import box_list
+from .region_similarity_calculator import IouSimilarity
+from .argmax_matcher import ArgMaxMatcher
+from .matcher import Match
+from .box_list import BoxList
+from .box_coder import FasterRcnnBoxCoder
+
+KEYPOINTS_FIELD_NAME = 'keypoints'
+
+
+#@torch.jit.script
+class TargetAssigner(object):
+    """Target assigner to compute classification and regression targets."""
+
+    def __init__(self, similarity_calc: IouSimilarity, matcher: ArgMaxMatcher, box_coder: FasterRcnnBoxCoder,
+                 negative_class_weight: float = 1.0, unmatched_cls_target: Optional[float] = None,
+                 keypoints_field_name: str = KEYPOINTS_FIELD_NAME):
+        """Construct Object Detection Target Assigner.
+
+        Args:
+            similarity_calc: a RegionSimilarityCalculator
+
+            matcher: Matcher used to match groundtruth to anchors.
+
+            box_coder: BoxCoder used to encode matching groundtruth boxes with respect to anchors.
+
+            negative_class_weight: classification weight to be associated to negative
+                anchors (default: 1.0). The weight must be in [0., 1.].
+
+            unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+                which is consistent with the classification target for each
+                anchor (and can be empty for scalar targets).  This shape must thus be
+                compatible with the groundtruth labels that are passed to the "assign"
+                function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+                If set to None, unmatched_cls_target is set to be [0] for each anchor.
+
+        Raises:
+            ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+                if matcher is not a Matcher or if box_coder is not a BoxCoder
+        """
+        self._similarity_calc = similarity_calc
+        self._matcher = matcher
+        self._box_coder = box_coder
+        self._negative_class_weight = negative_class_weight
+        if unmatched_cls_target is not None:
+            self._unmatched_cls_target = unmatched_cls_target
+        else:
+            self._unmatched_cls_target = 0.
+        self._keypoints_field_name = keypoints_field_name
+
+    def assign(self, anchors: BoxList, groundtruth_boxes: BoxList, groundtruth_labels=None, groundtruth_weights=None):
+        """Assign classification and regression targets to each anchor.
+
+        For a given set of anchors and groundtruth detections, match anchors
+        to groundtruth_boxes and assign classification and regression targets to
+        each anchor as well as weights based on the resulting match (specifying,
+        e.g., which anchors should not contribute to training loss).
+
+        Anchors that are not matched to anything are given a classification target
+        of self._unmatched_cls_target which can be specified via the constructor.
+
+        Args:
+            anchors: a BoxList representing N anchors
+
+            groundtruth_boxes: a BoxList representing M groundtruth boxes
+
+            groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
+                with labels for each of the ground_truth boxes. The subshape
+                [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
+                to None, groundtruth_labels assumes a binary problem where all
+                ground_truth boxes get a positive label (of 1).
+
+            groundtruth_weights: a float tensor of shape [M] indicating the weight to
+                assign to all anchors match to a particular groundtruth box. The weights
+                must be in [0., 1.]. If None, all weights are set to 1.
+
+            **params: Additional keyword arguments for specific implementations of the Matcher.
+
+        Returns:
+            cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+                where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+                which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+
+            cls_weights: a float32 tensor with shape [num_anchors]
+
+            reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+
+            reg_weights: a float32 tensor with shape [num_anchors]
+
+            match: a matcher.Match object encoding the match between anchors and groundtruth boxes,
+                with rows corresponding to groundtruth boxes and columns corresponding to anchors.
+
+        Raises:
+            ValueError: if anchors or groundtruth_boxes are not of type box_list.BoxList
+        """
+        if not isinstance(anchors, box_list.BoxList):
+            raise ValueError('anchors must be an BoxList')
+        if not isinstance(groundtruth_boxes, box_list.BoxList):
+            raise ValueError('groundtruth_boxes must be an BoxList')
+
+        # device = anchors.device()
+        # if groundtruth_labels is None:
+        #     groundtruth_labels = torch.ones(groundtruth_boxes.num_boxes(), device=device).unsqueeze(0)
+        #     groundtruth_labels = groundtruth_labels.unsqueeze(-1)
+        # if groundtruth_weights is None:
+        #     num_gt_boxes = groundtruth_boxes.num_boxes()
+        #     if not num_gt_boxes:
+        #         num_gt_boxes = groundtruth_boxes.num_boxes()
+        #     groundtruth_weights = torch.ones([num_gt_boxes], device=device)
+
+        match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, anchors)
+        match = self._matcher.match(match_quality_matrix)
+        reg_targets = self._create_regression_targets(anchors, groundtruth_boxes, match)
+        cls_targets = self._create_classification_targets(groundtruth_labels, match)
+        #reg_weights = self._create_regression_weights(match, groundtruth_weights)
+        #cls_weights = self._create_classification_weights(match, groundtruth_weights)
+
+        return cls_targets, reg_targets, match
+
+    def _create_regression_targets(self, anchors: BoxList, groundtruth_boxes: BoxList, match: Match):
+        """Returns a regression target for each anchor.
+
+        Args:
+            anchors: a BoxList representing N anchors
+
+            groundtruth_boxes: a BoxList representing M groundtruth_boxes
+
+            match: a matcher.Match object
+
+        Returns:
+            reg_targets: a float32 tensor with shape [N, box_code_dimension]
+        """
+        device = anchors.device()
+        zero_box = torch.zeros((1, 4), device=device)
+        matched_gt_boxes = match.gather_based_on_match(
+            groundtruth_boxes.boxes(), unmatched_value=zero_box, ignored_value=zero_box)
+        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+        if groundtruth_boxes.has_field(self._keypoints_field_name):
+            groundtruth_keypoints = groundtruth_boxes.get_field(self._keypoints_field_name)
+            zero_kp = torch.zeros((1,) + groundtruth_keypoints.shape[1:], device=device)
+            matched_keypoints = match.gather_based_on_match(
+                groundtruth_keypoints, unmatched_value=zero_kp, ignored_value=zero_kp)
+            matched_gt_boxlist.add_field(self._keypoints_field_name, matched_keypoints)
+        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+
+        unmatched_ignored_reg_targets = self._default_regression_target(device).repeat(match.match_results.shape[0], 1)
+
+        matched_anchors_mask = match.matched_column_indicator()
+        reg_targets = torch.where(matched_anchors_mask.unsqueeze(1), matched_reg_targets, unmatched_ignored_reg_targets)
+        return reg_targets
+
+    def _default_regression_target(self, device: torch.device):
+        """Returns the default target for anchors to regress to.
+
+        Default regression targets are set to zero (though in this implementation what
+        these targets are set to should not matter as the regression weight of any box
+        set to regress to the default target is zero).
+
+        Returns:
+            default_target: a float32 tensor with shape [1, box_code_dimension]
+        """
+        return torch.zeros(1, self._box_coder.code_size(), device=device)
+
+    def _create_classification_targets(self, groundtruth_labels, match: Match):
+        """Create classification targets for each anchor.
+
+        Assign a classification target of for each anchor to the matching
+        groundtruth label that is provided by match.  Anchors that are not matched
+        to anything are given the target self._unmatched_cls_target
+
+        Args:
+            groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+                with labels for each of the ground_truth boxes. The subshape
+                [d_1, ... d_k] can be empty (corresponding to scalar labels).
+            match: a matcher.Match object that provides a matching between anchors
+                and groundtruth boxes.
+
+        Returns:
+            a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
+            subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
+            shape [num_gt_boxes, d_1, d_2, ... d_k].
+        """
+        return match.gather_based_on_match(
+            groundtruth_labels,
+            unmatched_value=self._unmatched_cls_target, ignored_value=self._unmatched_cls_target)
+
+    def _create_regression_weights(self, match: Match, groundtruth_weights):
+        """Set regression weight for each anchor.
+
+        Only positive anchors are set to contribute to the regression loss, so this
+        method returns a weight of 1 for every positive anchor and 0 for every
+        negative anchor.
+
+        Args:
+            match: a matcher.Match object that provides a matching between anchors and groundtruth boxes.
+            groundtruth_weights: a float tensor of shape [M] indicating the weight to
+                assign to all anchors match to a particular groundtruth box.
+
+        Returns:
+            a float32 tensor with shape [num_anchors] representing regression weights.
+        """
+        return match.gather_based_on_match(groundtruth_weights, ignored_value=0., unmatched_value=0.)
+
+    def _create_classification_weights(self, match: Match, groundtruth_weights):
+        """Create classification weights for each anchor.
+
+        Positive (matched) anchors are associated with a weight of
+        positive_class_weight and negative (unmatched) anchors are associated with
+        a weight of negative_class_weight. When anchors are ignored, weights are set
+        to zero. By default, both positive/negative weights are set to 1.0,
+        but they can be adjusted to handle class imbalance (which is almost always
+        the case in object detection).
+
+        Args:
+            match: a matcher.Match object that provides a matching between anchors and groundtruth boxes.
+            groundtruth_weights: a float tensor of shape [M] indicating the weight to
+                assign to all anchors match to a particular groundtruth box.
+
+        Returns:
+            a float32 tensor with shape [num_anchors] representing classification weights.
+        """
+        return match.gather_based_on_match(
+            groundtruth_weights, ignored_value=0., unmatched_value=self._negative_class_weight)
+
+    def box_coder(self):
+        """Get BoxCoder of this TargetAssigner.
+
+        Returns:
+            BoxCoder object.
+        """
+        return self._box_coder
diff --git a/efficientdet/effdet/soft_nms.py b/efficientdet/effdet/soft_nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..fff0158e3e17b499053c5b1bd9c27581835bbc80
--- /dev/null
+++ b/efficientdet/effdet/soft_nms.py
@@ -0,0 +1,170 @@
+""" PyTorch Soft-NMS
+
+This code was adapted from a PR for detectron2 submitted by https://github.com/alekseynp
+https://github.com/facebookresearch/detectron2/pull/1183/files
+
+Detectron2 is licensed Apache 2.0, Copyright Facebook Inc.
+"""
+import torch
+from typing import List
+
+
+def pairwise_iou(boxes1, boxes2) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M,
+    compute the IoU (intersection over union)
+    between __all__ N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax).
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])  # [N,]
+    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])  # [M,]
+
+    width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
+        boxes1[:, None, :2], boxes2[:, :2]
+    )  # [N,M,2]
+
+    width_height.clamp_(min=0)  # [N,M,2]
+    inter = width_height.prod(dim=2)  # [N,M]
+
+    # handle empty boxes
+    iou = torch.where(
+        inter > 0,
+        inter / (area1[:, None] + area2 - inter),
+        torch.zeros(1, dtype=inter.dtype, device=inter.device),
+    )
+    return iou
+
+
+def soft_nms(
+    boxes,
+    scores,
+    method_gaussian: bool = True,
+    sigma: float = 0.5,
+    iou_threshold: float = .5,
+    score_threshold: float = 0.005
+):
+    """
+    Soft non-max suppression algorithm.
+
+    Implementation of [Soft-NMS -- Improving Object Detection With One Line of Codec]
+    (https://arxiv.org/abs/1704.04503)
+
+    Args:
+        boxes_remain (Tensor[N, ?]):
+           boxes where NMS will be performed
+           if Boxes, in (x1, y1, x2, y2) format
+           if RotatedBoxes, in (x_ctr, y_ctr, width, height, angle_degrees) format
+        scores_remain (Tensor[N]):
+           scores for each one of the boxes
+        method_gaussian (bool): use gaussian method if True, otherwise linear        
+        sigma (float):
+           parameter for Gaussian penalty function
+        iou_threshold (float):
+           iou threshold for applying linear decay. Nt from the paper
+           re-used as threshold for standard "hard" nms
+        score_threshold (float):
+           boxes with scores below this threshold are pruned at each iteration.
+           Dramatically reduces computation time. Authors use values in [10e-4, 10e-2]
+
+    Returns:
+        tuple(Tensor, Tensor):
+            [0]: int64 tensor with the indices of the elements that have been kept
+            by Soft NMS, sorted in decreasing order of scores
+            [1]: float tensor with the re-scored scores of the elements that were kept
+    """
+    device = boxes.device
+    boxes_remain = boxes.clone()
+    scores_remain = scores.clone()
+    num_elem = scores_remain.size()[0]
+    idxs = torch.arange(num_elem)
+    idxs_out = torch.zeros(num_elem, dtype=torch.int64, device=device)
+    scores_out = torch.zeros(num_elem, dtype=torch.float32, device=device)
+    count: int = 0
+
+    while scores_remain.numel() > 0:
+        top_idx = torch.argmax(scores_remain)
+        idxs_out[count] = idxs[top_idx]
+        scores_out[count] = scores_remain[top_idx]
+        count += 1
+
+        top_box = boxes_remain[top_idx]
+        ious = pairwise_iou(top_box.unsqueeze(0), boxes_remain)[0]
+
+        if method_gaussian:
+            decay = torch.exp(-torch.pow(ious, 2) / sigma)
+        else:
+            decay = torch.ones_like(ious)
+            decay_mask = ious > iou_threshold
+            decay[decay_mask] = 1 - ious[decay_mask]
+
+        scores_remain *= decay
+        keep = scores_remain > score_threshold
+        keep[top_idx] = torch.tensor(False, device=device)
+
+        boxes_remain = boxes_remain[keep]
+        scores_remain = scores_remain[keep]
+        idxs = idxs[keep]
+
+    return idxs_out[:count], scores_out[:count]
+
+
+def batched_soft_nms(
+    boxes, scores, idxs,
+    method_gaussian: bool = True,
+    sigma: float = 0.5,
+    iou_threshold: float = .5,
+    score_threshold: float = 0.001):
+
+    """
+    Performs soft non-maximum suppression in a batched fashion.
+
+    Each index value correspond to a category, and NMS
+    will not be applied between elements of different categories.
+
+    Args:
+        boxes (Tensor[N, 4]):
+           boxes where NMS will be performed. They
+           are expected to be in (x1, y1, x2, y2) format
+        scores (Tensor[N]):
+           scores for each one of the boxes
+        idxs (Tensor[N]):
+           indices of the categories for each one of the boxes.
+        method (str):
+           one of ['gaussian', 'linear', 'hard']
+           see paper for details. users encouraged not to use "hard", as this is the
+           same nms available elsewhere in detectron2
+        sigma (float):
+           parameter for Gaussian penalty function
+        iou_threshold (float):
+           iou threshold for applying linear decay. Nt from the paper
+           re-used as threshold for standard "hard" nms
+        score_threshold (float):
+           boxes with scores below this threshold are pruned at each iteration.
+           Dramatically reduces computation time. Authors use values in [10e-4, 10e-2]
+    Returns:
+        tuple(Tensor, Tensor):
+            [0]: int64 tensor with the indices of the elements that have been kept
+            by Soft NMS, sorted in decreasing order of scores
+            [1]: float tensor with the re-scored scores of the elements that were kept
+    """
+    if boxes.numel() == 0:
+        return (
+            torch.empty((0,), dtype=torch.int64, device=boxes.device),
+            torch.empty((0,), dtype=torch.float32, device=scores.device),
+        )
+    # strategy: in order to perform NMS independently per class.
+    # we add an offset to all the boxes. The offset is dependent
+    # only on the class idx, and is large enough so that boxes
+    # from different classes do not overlap
+    max_coordinate = boxes.max()
+    offsets = idxs.to(boxes) * (max_coordinate + 1)
+    boxes_for_nms = boxes + offsets[:, None]
+    return soft_nms(
+        boxes_for_nms, scores, method_gaussian=method_gaussian, sigma=sigma,
+        iou_threshold=iou_threshold, score_threshold=score_threshold
+    )
+
diff --git a/efficientdet/effdet/version.py b/efficientdet/effdet/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..020ed73d7a09b032ea1b3291090cbbdeee5a181a
--- /dev/null
+++ b/efficientdet/effdet/version.py
@@ -0,0 +1 @@
+__version__ = '0.2.2'
diff --git a/efficientdet/efficientdet.py b/efficientdet/efficientdet.py
new file mode 100755
index 0000000000000000000000000000000000000000..4d61c3db9747e2cfbf8c9b8831427f610c56c9e9
--- /dev/null
+++ b/efficientdet/efficientdet.py
@@ -0,0 +1,268 @@
+'''
+Efficientdet demo
+'''
+import argparse
+import cv2
+import os
+import time
+
+from PIL import Image
+import PIL.ImageColor as ImageColor
+import requests
+import matplotlib.pyplot as plt
+
+import torch
+import torchvision.transforms as T
+from tqdm import tqdm
+
+from effdet import create_model
+
+
+def get_args_parser():
+    parser = argparse.ArgumentParser(
+        'Test detr on one image')
+    parser.add_argument(
+        '--img', metavar='IMG',
+        help='path to image, could be url',
+        default='https://www.fyidenmark.com/images/denmark-litter.jpg')
+    parser.add_argument(
+        '--save', metavar='OUTPUT',
+        help='path to save image with predictions (if None show image)',
+        default=None)
+    parser.add_argument('--classes', nargs='+', default=['Litter'])
+    parser.add_argument(
+        '--checkpoint', type=str,
+        help='path to checkpoint')
+    parser.add_argument(
+        '--device', type=str, default='cpu',
+        help='device to evaluate model (default: cpu)')
+    parser.add_argument(
+        '--prob_threshold', type=float, default=0.3,
+        help='probability threshold to show results (default: 0.5)')
+    parser.add_argument(
+        '--video', action='store_true', default=False,
+        help="If true, we treat impute as video (default: False)")
+    parser.set_defaults(redundant_bias=None)
+    return parser
+
+
+# standard PyTorch mean-std input image normalization
+def get_transforms(im, size=768):
+    transform = T.Compose([
+        T.Resize((size, size)),
+        T.ToTensor(),
+        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ])
+    return transform(im).unsqueeze(0)
+
+
+def rescale_bboxes(out_bbox, size, resize):
+    img_w, img_h = size
+    out_w, out_h = resize
+    b = out_bbox * torch.tensor([img_w/out_w, img_h/out_h,
+                                 img_w/out_w, img_h/out_h],
+                                dtype=torch.float32).to(
+                                    out_bbox.device)
+    return b
+
+
+# from https://deepdrive.pl/
+def get_output(img, prob, boxes, classes=['Litter'], stat_text=None):
+    # colors for visualization
+    STANDARD_COLORS = [
+        'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige',
+        'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
+        'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk',
+        'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey',
+        'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon',
+        'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
+        'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
+        'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold',
+        'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink',
+        'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush',
+        'LawnGreen', 'LemonChiffon', 'LightBlue',
+        'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray',
+        'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen',
+        'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue',
+        'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta',
+        'MediumAquaMarine', 'MediumOrchid', 'MediumPurple',
+        'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
+        'MediumTurquoise', 'MediumVioletRed', 'MintCream',
+        'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive',
+        'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod',
+        'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip',
+        'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
+        'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
+        'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
+        'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue',
+        'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet',
+        'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen'
+    ]
+    palette = [ImageColor.getrgb(_) for _ in STANDARD_COLORS]
+    for p, (x0, y0, x1, y1) in zip(prob, boxes.tolist()):
+        cl = int(p[1] - 1)
+        color = palette[cl]
+        start_p, end_p = (int(x0), int(y0)), (int(x1), int(y1))
+        cv2.rectangle(img, start_p, end_p, color, 2)
+        text = "%s %.1f%%" % (classes[cl], p[0]*100)
+        cv2.putText(img, text, start_p, cv2.FONT_HERSHEY_SIMPLEX, 1,
+                    (0, 0, 0), 10)
+        cv2.putText(img, text, start_p, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
+    if stat_text is not None:
+        cv2.putText(img, stat_text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
+                    (0, 0, 0), 10)
+        cv2.putText(img, stat_text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
+                    (255, 255, 255), 3)
+    return img
+
+
+# from https://deepdrive.pl/
+def save_frames(args, num_iter=45913):
+    if not os.path.exists(args.save):
+        os.makedirs(args.save)
+
+    cap = cv2.VideoCapture(args.img)
+    counter = 0
+    pbar = tqdm(total=num_iter+1)
+    num_classes = len(args.classes)
+    model_name = args.checkpoint.split('-')[-1].split('/')[0]
+    model = set_model(model_name, num_classes, args.checkpoint, args.device)
+    model.eval()
+
+    model.to(args.device)
+
+    while(cap.isOpened()):
+        ret, img = cap.read()
+        if img is None:
+            print("END")
+            break
+
+        # scale + BGR to RGB
+        inference_size = (768, 768)
+        scaled_img = cv2.resize(img[:, :, ::-1], inference_size)
+
+        transform = T.Compose([
+            T.ToTensor(),
+            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+
+        # mean-std normalize the input image (batch-size: 1)
+        img_tens = transform(scaled_img).unsqueeze(0).to(args.device)
+
+        # Inference
+        t0 = time.time()
+        with torch.no_grad():
+            # propagate through the model
+            output = model(img_tens)
+        t1 = time.time()
+
+        # keep only predictions above set confidence
+        bboxes_keep = output[0, output[0, :, 4] > args.prob_threshold]
+        probas = bboxes_keep[:, 4:]
+
+        # convert boxes to image scales
+        bboxes_scaled = rescale_bboxes(bboxes_keep[:, :4],
+                                       (img.shape[1], img.shape[0]),
+                                       inference_size)
+
+        txt = "Detect-waste %s Threshold=%.2f " \
+              "Inference %dx%d  GPU: %s Inference time %.3fs" % \
+              (model_name, args.prob_threshold, inference_size[0],
+               inference_size[1], torch.cuda.get_device_name(0),
+               t1 - t0)
+        result = get_output(img, probas, bboxes_scaled,
+                            args.classes, txt)
+        cv2.imwrite(os.path.join(args.save, 'img%08d.jpg' % counter), result)
+        counter += 1
+        pbar.update(1)
+        del img
+        del img_tens
+        del result
+
+    cap.release()
+
+
+def plot_results(pil_img, prob, boxes, classes=['Litter'],
+                 save_path=None, colors=None):
+    plt.figure(figsize=(16, 10))
+    plt.imshow(pil_img)
+    ax = plt.gca()
+    if colors is None:
+        # colors for visualization
+        colors = 100 * [
+           [0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
+           [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]
+    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes, colors):
+        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
+                                   fill=False, color=c, linewidth=3))
+        cl = int(p[1])
+        text = f'{classes[cl]}: {p[0]:0.2f}'
+        ax.text(xmin, ymin, text, fontsize=15,
+                bbox=dict(facecolor='yellow', alpha=0.5))
+    plt.axis('off')
+    if save_path is not None:
+        plt.savefig(save_path, bbox_inches='tight',
+                    transparent=True, pad_inches=0)
+        plt.close()
+        print(f'Image saved at {save_path}')
+    else:
+        plt.show()
+
+
+def set_model(model_type, num_classes, checkpoint_path, device):
+
+    # create model
+    model = create_model(
+        model_type,
+        bench_task='predict',
+        num_classes=num_classes,
+        pretrained=False,
+        redundant_bias=True,
+        checkpoint_path=checkpoint_path
+    )
+
+    param_count = sum([m.numel() for m in model.parameters()])
+    print('Model %s created, param count: %d' % (model_type, param_count))
+    model = model.to(device)
+    return model
+
+
+def main(args):
+    # prepare model for evaluation
+    torch.set_grad_enabled(False)
+    num_classes = len(args.classes)
+    model_name = args.checkpoint.split('-')[-1].split('/')[0]
+    model = set_model(model_name, num_classes, args.checkpoint, args.device)
+
+    model.eval()
+    # get image
+    if args.img.startswith('https'):
+        im = Image.open(requests.get(args.img, stream=True).raw).convert('RGB')
+    else:
+        im = Image.open(args.img).convert('RGB')
+
+    # mean-std normalize the input image (batch-size: 1)
+    img = get_transforms(im)
+
+    # propagate through the model
+    outputs = model(img.to(args.device))
+
+    # keep only predictions above set confidence
+    bboxes_keep = outputs[0, outputs[0, :, 4] > args.prob_threshold]
+    probas = bboxes_keep[:, 4:]
+
+    # convert boxes to image scales
+    bboxes_scaled = rescale_bboxes(bboxes_keep[:, :4], im.size,
+                                   tuple(img.size()[2:]))
+
+    # plot and save demo image
+    plot_results(im, probas, bboxes_scaled.tolist(), args.classes, args.save)
+
+
+if __name__ == '__main__':
+    parser = get_args_parser()
+    args = parser.parse_args()
+    if args.video:
+        save_frames(args)
+    else:
+        main(args)
diff --git a/models/efficientdet-d2-detector.pth.tar b/models/efficientdet-d2-detector.pth.tar
new file mode 100644
index 0000000000000000000000000000000000000000..92bc303935575092990a6c8359a19c759d8cda78
--- /dev/null
+++ b/models/efficientdet-d2-detector.pth.tar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:499a3f0c75e13669d69be25854e980812e2f6b50e618ba2b2e90b25f193e7fd9
+size 97791163
diff --git a/models/resnet50-classifier.pkl b/models/resnet50-classifier.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..305f3c9740b729229613ce8bd71f9444d38e8071
--- /dev/null
+++ b/models/resnet50-classifier.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2c0667090f996cbe4bab8585300528b8896071e70b1edfdbe671015a074e85
+size 102980821
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acd41946d2b46521375aba485cfcfe48ab65519e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,22 @@
+albumentations>=0.5.2
+efficientnet_pytorch
+fastai==2.7.13
+funcy==1.15
+iterative-stratification==0.1.6
+matplotlib==3.8.2
+numpy==1.26.2
+omegaconf>=2.0
+opencv-python==4.8.1.78
+opencv-python-headless==4.8.1.78
+pycocotools>=2.0.0
+pytorch_lightning
+pyyaml
+rembg==2.0.53
+scikit-learn==1.3.2
+scikit-plot
+scipy==1.11.4
+streamlit
+timm
+torch
+torchvision
+tqdm==4.66.1
\ No newline at end of file
diff --git a/trash_detector.py b/trash_detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f959323b292a6289f80b2299003f4aad01906bc
--- /dev/null
+++ b/trash_detector.py
@@ -0,0 +1,57 @@
+import numpy as np
+import torch
+from fastai.vision.all import load_learner
+
+from efficientdet.efficientdet import get_transforms, rescale_bboxes, set_model
+
+
+def localize_trash(im, det_name, det_checkpoint, device, prob_threshold):
+    # detector
+    detector = set_model(det_name, 1, det_checkpoint, device)
+    detector.eval()
+    # mean-std normalize the input image (batch-size: 1)
+    img = get_transforms(im)
+    # propagate through the model
+    outputs = detector(img.to(device))
+    # keep only predictions above set confidence
+    bboxes_keep = outputs[0, outputs[0, :, 4] > prob_threshold]
+    probas = bboxes_keep[:, 4:]
+    # convert boxes to image scales
+    bboxes_scaled = rescale_bboxes(bboxes_keep[:, :4], im.size, tuple(img.size()[2:]))
+    return probas, bboxes_scaled
+
+
+def classify_trash(im, clas_checkpoint, cls_th, probas, bboxes_scaled):
+    # classifier
+    classifier = load_learner(clas_checkpoint)
+
+    bboxes_final = []
+    cls_prob = []
+    for p, (xmin, ymin, xmax, ymax) in zip(probas, bboxes_scaled.tolist()):
+        img = im.crop((xmin, ymin, xmax, ymax))
+        outputs = classifier.predict(img)
+        p[1] = torch.topk(outputs[2], k=1).indices.squeeze(0).item()
+        p[0] = torch.max(np.trunc(outputs[2] * 100))
+        if p[0] >= cls_th * 100:
+            bboxes_final.append((xmin, ymin, xmax, ymax))
+            cls_prob.append(p)
+    return cls_prob, bboxes_final
+
+
+def detect_trash(
+    im, det_name, det_checkpoint, clas_checkpoint, device, prob_threshold, cls_th
+):
+    # prepare models for evaluation
+    torch.set_grad_enabled(False)
+
+    # 1) Localize
+    probas, bboxes_scaled = localize_trash(
+        im, det_name, det_checkpoint, device, prob_threshold
+    )
+
+    # 2) Classify
+    cls_prob, bboxes_final = classify_trash(
+        im, clas_checkpoint, cls_th, probas, bboxes_scaled
+    )
+
+    return cls_prob, bboxes_final