Spaces:

tidalove
/

yolox

Sleeping

App Files Files Community

Feng Wang commited on Jul 18, 2021

Commit

39bbed9

1 Parent(s): ad34180

feat(YOLOX): add some basic experiments

Browse files

Files changed (21) hide show

exps/nano.py +39 -0
exps/yolov3.py +89 -0
exps/yolox_l.py +15 -0
exps/yolox_m.py +15 -0
exps/yolox_s.py +15 -0
exps/yolox_tiny.py +19 -0
exps/yolox_x.py +15 -0
tools/demo.py +4 -0
tools/eval.py +4 -0
tools/export_onnx.py +1 -0
tools/train.py +4 -0
tools/trt.py +10 -0
yolox/core/trainer.py +1 -1
yolox/data/data_augment.py +0 -91
yolox/data/datasets/mosaicdetection.py +23 -17
yolox/exp/build.py +2 -1
yolox/exp/yolox_base.py +7 -8
yolox/models/darknet.py +24 -11
yolox/models/network_blocks.py +13 -10
yolox/models/yolo_pafpn.py +14 -6
yolox/utils/visualize.py +2 -0

exps/nano.py ADDED Viewed

	@@ -0,0 +1,39 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+import torch.nn as nn
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 0.33
+        self.width = 0.25
+        self.scale = (0.5, 1.5)
+        self.random_size = (10, 20)
+        self.test_size = (416, 416)
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+        self.enable_mixup = False
+    def get_model(self, sublinear=False):
+        def init_yolo(M):
+            for m in M.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eps = 1e-3
+                    m.momentum = 0.03
+        if "model" not in self.__dict__:
+            from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
+            in_channels = [256, 512, 1024]
+            # NANO model use depthwise = True, which is main difference.
+            backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
+            head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
+            self.model = YOLOX(backbone, head)
+        self.model.apply(init_yolo)
+        self.model.head.initialize_biases(1e-2)
+        return self.model

exps/yolov3.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+import torch
+import torch.nn as nn
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 1.0
+        self.width = 1.0
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+    def get_model(self, sublinear=False):
+        def init_yolo(M):
+            for m in M.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eps = 1e-3
+                    m.momentum = 0.03
+        if "model" not in self.__dict__:
+            from yolox.models import YOLOX, YOLOFPN, YOLOXHead
+            backbone = YOLOFPN()
+            head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
+            self.model = YOLOX(backbone, head)
+        self.model.apply(init_yolo)
+        self.model.head.initialize_biases(1e-2)
+        return self.model
+    def get_data_loader(self, batch_size, is_distributed, no_aug=False):
+        from data.datasets.cocodataset import COCODataset
+        from data.datasets.mosaicdetection import MosaicDetection
+        from data.datasets.data_augment import TrainTransform
+        from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler
+        import torch.distributed as dist
+        dataset = COCODataset(
+                data_dir='data/COCO/',
+                json_file=self.train_ann,
+                img_size=self.input_size,
+                preproc=TrainTransform(
+                    rgb_means=(0.485, 0.456, 0.406),
+                    std=(0.229, 0.224, 0.225),
+                    max_labels=50
+                ),
+        )
+        dataset = MosaicDetection(
+            dataset,
+            mosaic=not no_aug,
+            img_size=self.input_size,
+            preproc=TrainTransform(
+                rgb_means=(0.485, 0.456, 0.406),
+                std=(0.229, 0.224, 0.225),
+                max_labels=120
+            ),
+            degrees=self.degrees,
+            translate=self.translate,
+            scale=self.scale,
+            shear=self.shear,
+            perspective=self.perspective,
+        )
+        self.dataset = dataset
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+            sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
+        else:
+            sampler = torch.utils.data.RandomSampler(self.dataset)
+        batch_sampler = YoloBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            drop_last=False,
+            input_dimension=self.input_size,
+            mosaic=not no_aug
+        )
+        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
+        dataloader_kwargs["batch_sampler"] = batch_sampler
+        train_loader = DataLoader(self.dataset, **dataloader_kwargs)
+        return train_loader

exps/yolox_l.py ADDED Viewed

	@@ -0,0 +1,15 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 1.0
+        self.width = 1.0
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

exps/yolox_m.py ADDED Viewed

	@@ -0,0 +1,15 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 0.67
+        self.width = 0.75
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

exps/yolox_s.py ADDED Viewed

	@@ -0,0 +1,15 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 0.33
+        self.width = 0.50
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

exps/yolox_tiny.py ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 0.33
+        self.width = 0.375
+        self.scale = (0.5, 1.5)
+        self.random_size = (10, 20)
+        self.test_size = (416, 416)
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+        self.enable_mixup = False

exps/yolox_x.py ADDED Viewed

	@@ -0,0 +1,15 @@

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+import os
+from yolox.exp import Exp as MyExp
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.depth = 1.33
+        self.width = 1.25
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]

tools/demo.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import argparse
 import os
 import time

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
 import argparse
 import os
 import time

tools/eval.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import argparse
 import os
 import random

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
 import argparse
 import os
 import random

tools/export_onnx.py CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 import argparse
 import os

 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
 import argparse
 import os

tools/train.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import argparse
 import random
 import warnings

+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
 import argparse
 import random
 import warnings

tools/trt.py CHANGED Viewed

@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 import argparse
 import os
 from loguru import logger
 import tensorrt as trt
@@ -61,6 +63,14 @@ def main():
     )
     torch.save(model_trt.state_dict(), os.path.join(file_name, 'model_trt.pth'))
     logger.info("Converted TensorRT model done.")
 if __name__ == "__main__":

 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
 import argparse
 import os
+import shutil
 from loguru import logger
 import tensorrt as trt
     )
     torch.save(model_trt.state_dict(), os.path.join(file_name, 'model_trt.pth'))
     logger.info("Converted TensorRT model done.")
+    engine_file = os.path.join(file_name, 'model_trt.engine')
+    engine_file_demo = os.path.join('yolox', 'deploy', 'demo_trt_c++', 'model_trt.engine')
+    with open(engine_file, 'wb') as f:
+        f.write(model_trt.engine.serialize())
+    shutil.copyfile(engine_file, engine_file_demo)
+    logger.info("Converted TensorRT model engine file is saved for C++ inference.")
 if __name__ == "__main__":

yolox/core/trainer.py CHANGED Viewed

@@ -283,7 +283,7 @@ class Trainer:
                 logger.info("loading checkpoint for fine tuning")
                 ckpt_file = self.args.ckpt
                 ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
-                model = load_ckpt(self.model, ckpt)
             self.start_epoch = 0
         return model

                 logger.info("loading checkpoint for fine tuning")
                 ckpt_file = self.args.ckpt
                 ckpt = torch.load(ckpt_file, map_location=self.device)["model"]
+                model = load_ckpt(model, ckpt)
             self.start_epoch = 0
         return model

yolox/data/data_augment.py CHANGED Viewed

@@ -162,97 +162,6 @@ def _mirror(image, boxes):
     return image, boxes
-# TODO: reorg: use mosaicDet instead
-def _random_affine(
-    img,
-    targets=None,
-    degrees=(-10, 10),
-    translate=(0.1, 0.1),
-    scale=(0.9, 1.1),
-    shear=(-2, 2),
-    borderValue=(114, 114, 114),
-):
-    # degrees = (0, 0)
-    # shear = (0, 0)
-    border = 0  # width of added border (optional)
-    # height = max(img.shape[0], img.shape[1]) + border * 2
-    height, width, _ = img.shape
-    # Rotation and Scale
-    R = np.eye(3)
-    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
-    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
-    s = random.random() * (scale[1] - scale[0]) + scale[0]
-    R[:2] = cv2.getRotationMatrix2D(
-        angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s
-    )
-    # Translation
-    T = np.eye(3)
-    # x translation (pixels)
-    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border
-    # y translation (pixels)
-    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border
-    # Shear
-    S = np.eye(3)
-    # x shear (deg)
-    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)
-    # y shear (deg)
-    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)
-    # Combined rotation matrix. NOTE: ORDER IS IMPORTANT HERE!!
-    M = S @ T @ R
-    # BGR order borderValue
-    imw = cv2.warpPerspective(
-        img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=borderValue
-    )
-    # Return warped points also
-    if targets is not None:
-        if len(targets) > 0:
-            n = targets.shape[0]
-            points = targets[:, 0:4].copy()
-            # warp points
-            xy = np.ones((n * 4, 3))
-            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
-                n * 4, 2
-            )  # x1y1, x2y2, x1y2, x2y1
-            xy = (xy @ M.T)[:, :2].reshape(n, 8)
-            # create new boxes
-            x = xy[:, [0, 2, 4, 6]]
-            y = xy[:, [1, 3, 5, 7]]
-            xy = (
-                np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-            )
-            # apply angle-based reduction
-            radians = a * math.pi / 180
-            reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
-            x = (xy[:, 2] + xy[:, 0]) / 2
-            y = (xy[:, 3] + xy[:, 1]) / 2
-            w = (xy[:, 2] - xy[:, 0]) * reduction
-            h = (xy[:, 3] - xy[:, 1]) * reduction
-            xy = (
-                np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2))
-                .reshape(4, n)
-                .T
-            )
-            # reject warped points outside of image
-            x1 = np.clip(xy[:, 0], 0, width)
-            y1 = np.clip(xy[:, 1], 0, height)
-            x2 = np.clip(xy[:, 2], 0, width)
-            y2 = np.clip(xy[:, 3], 0, height)
-            boxes = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T
-        return imw, boxes, M
-    else:
-        return imw
 def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
     if len(image.shape) == 3:
         padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0

     return image, boxes
 def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
     if len(image.shape) == 3:
         padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0

yolox/data/datasets/mosaicdetection.py CHANGED Viewed

@@ -48,10 +48,10 @@ class MosaicDetection(Dataset):
     def __getitem__(self, idx):
         if self._mosaic:
             labels4 = []
-            s = self._dataset.input_dim[0]
             # yc, xc = s, s  # mosaic center x, y
-            yc = int(random.uniform(0.5 * s, 1.5 * s))
-            xc = int(random.uniform(0.5 * s, 1.5 * s))
             # 3 additional image indices
             indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
@@ -59,26 +59,28 @@ class MosaicDetection(Dataset):
             for i, index in enumerate(indices):
                 img, _labels, _, _ = self._dataset.pull_item(index)
                 h0, w0 = img.shape[:2]  # orig hw
-                r = 1.0 * s / max(h0, w0)  # resize image to img_size
                 interp = cv2.INTER_LINEAR
-                img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
                 (h, w) = img.shape[:2]
                 if i == 0:  # top left
                     # base image with 4 tiles
-                    img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)
                     # xmin, ymin, xmax, ymax (large image)
                     x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
                     # xmin, ymin, xmax, ymax (small image)
                     x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
                 elif i == 1:  # top right
-                    x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                     x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
                 elif i == 2:  # bottom left
-                    x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                     x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
                 elif i == 3:  # bottom right
-                    x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                     x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
                 img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
@@ -87,15 +89,20 @@ class MosaicDetection(Dataset):
                 labels = _labels.copy()  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
                 if _labels.size > 0:  # Normalized xywh to pixel xyxy format
-                    labels[:, 0] = r * _labels[:, 0] + padw
-                    labels[:, 1] = r * _labels[:, 1] + padh
-                    labels[:, 2] = r * _labels[:, 2] + padw
-                    labels[:, 3] = r * _labels[:, 3] + padh
                 labels4.append(labels)
             if len(labels4):
                 labels4 = np.concatenate(labels4, 0)
-                np.clip(labels4[:, :4], 0, 2 * s, out=labels4[:, :4])  # use with random_affine
             img4, labels4 = random_perspective(
                 img4,
                 labels4,
@@ -104,7 +111,7 @@ class MosaicDetection(Dataset):
                 scale=self.scale,
                 shear=self.shear,
                 perspective=self.perspective,
-                border=[-s // 2, -s // 2],
             )  # border to remove
             # -----------------------------------------------------------------
@@ -124,7 +131,6 @@ class MosaicDetection(Dataset):
             return img, label, img_info, int(idx)
     def mixup(self, origin_img, origin_labels, input_dim):
-        # jit_factor = random.uniform(0.8, 1.2)
         jit_factor = random.uniform(*self.mixup_scale)
         FLIP = random.uniform(0, 1) > 0.5
         cp_labels = []
@@ -139,7 +145,7 @@ class MosaicDetection(Dataset):
             cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0
         else:
             cp_img = np.ones(input_dim) * 114.0
-        cp_scale_ratio = input_dim[0] / max(img.shape[0], img.shape[1])
         resized_img = cv2.resize(
             img,
             (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),

     def __getitem__(self, idx):
         if self._mosaic:
             labels4 = []
+            input_dim = self._dataset.input_dim
             # yc, xc = s, s  # mosaic center x, y
+            yc = int(random.uniform(0.5 * input_dim[0], 1.5 * input_dim[0]))
+            xc = int(random.uniform(0.5 * input_dim[1], 1.5 * input_dim[1]))
             # 3 additional image indices
             indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]
             for i, index in enumerate(indices):
                 img, _labels, _, _ = self._dataset.pull_item(index)
                 h0, w0 = img.shape[:2]  # orig hw
+                scale = min(1. * input_dim[0] / h0, 1. * input_dim[1] / w0)
                 interp = cv2.INTER_LINEAR
+                img = cv2.resize(img, (int(w0 * scale), int(h0 * scale)), interpolation=interp)
                 (h, w) = img.shape[:2]
                 if i == 0:  # top left
                     # base image with 4 tiles
+                    img4 = np.full(
+                        (input_dim[0] * 2, input_dim[1] * 2, img.shape[2]), 114, dtype=np.uint8
+                    )
                     # xmin, ymin, xmax, ymax (large image)
                     x1a, y1a, x2a, y2a = (max(xc - w, 0), max(yc - h, 0), xc, yc,)
                     # xmin, ymin, xmax, ymax (small image)
                     x1b, y1b, x2b, y2b = (w - (x2a - x1a), h - (y2a - y1a), w, h,)
                 elif i == 1:  # top right
+                    x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, input_dim[1] * 2), yc
                     x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
                 elif i == 2:  # bottom left
+                    x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(input_dim[0] * 2, yc + h)
                     x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
                 elif i == 3:  # bottom right
+                    x1a, y1a, x2a, y2a = xc, yc, min(xc + w, input_dim[1] * 2), min(input_dim[0] * 2, yc + h)  # noqa
                     x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
                 img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
                 labels = _labels.copy()  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
                 if _labels.size > 0:  # Normalized xywh to pixel xyxy format
+                    labels[:, 0] = scale * _labels[:, 0] + padw
+                    labels[:, 1] = scale * _labels[:, 1] + padh
+                    labels[:, 2] = scale * _labels[:, 2] + padw
+                    labels[:, 3] = scale * _labels[:, 3] + padh
                 labels4.append(labels)
             if len(labels4):
                 labels4 = np.concatenate(labels4, 0)
+                np.clip(labels4[:, 0], 0, 2 * input_dim[1], out=labels4[:, 0])
+                np.clip(labels4[:, 1], 0, 2 * input_dim[0], out=labels4[:, 1])
+                np.clip(labels4[:, 2], 0, 2 * input_dim[1], out=labels4[:, 2])
+                np.clip(labels4[:, 3], 0, 2 * input_dim[0], out=labels4[:, 3])
             img4, labels4 = random_perspective(
                 img4,
                 labels4,
                 scale=self.scale,
                 shear=self.shear,
                 perspective=self.perspective,
+                border=[-input_dim[0] // 2, -input_dim[1] // 2],
             )  # border to remove
             # -----------------------------------------------------------------
             return img, label, img_info, int(idx)
     def mixup(self, origin_img, origin_labels, input_dim):
         jit_factor = random.uniform(*self.mixup_scale)
         FLIP = random.uniform(0, 1) > 0.5
         cp_labels = []
             cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0
         else:
             cp_img = np.ones(input_dim) * 114.0
+        cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
         resized_img = cv2.resize(
             img,
             (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),

yolox/exp/build.py CHANGED Viewed

@@ -25,11 +25,12 @@ def get_exp_by_name(exp_name):
         "yolox-m": "yolox_l.py",
         "yolox-l": "yolox_l.py",
         "yolox-x": "yolox_x.py",
         "yolox-nano": "nano.py",
         "yolov3": "yolov3.py",
     }
     filename = filedict[exp_name]
-    exp_path = os.path.join(yolox_path, "exps", "base", filename)
     return get_exp_by_file(exp_path)

         "yolox-m": "yolox_l.py",
         "yolox-l": "yolox_l.py",
         "yolox-x": "yolox_x.py",
+        "yolox-tiny": "yolox_tiny.py",
         "yolox-nano": "nano.py",
         "yolov3": "yolov3.py",
     }
     filename = filedict[exp_name]
+    exp_path = os.path.join(yolox_path, "exps", filename)
     return get_exp_by_file(exp_path)

yolox/exp/yolox_base.py CHANGED Viewed

@@ -141,22 +141,21 @@ class Exp(BaseExp):
         return train_loader
     def random_resize(self, data_loader, epoch, rank, is_distributed):
-        tensor = torch.LongTensor(1).cuda()
         if rank == 0:
-            if epoch >= self.max_epoch - 1:
-                size = self.input_size[0]
-            else:
-                size = random.randint(*self.random_size)
-                size = int(32 * size)
-            tensor.fill_(size)
         if is_distributed:
             dist.barrier()
             dist.broadcast(tensor, 0)
         input_size = data_loader.change_input_dim(
-            multiple=tensor.item(), random_range=None
         )
         return input_size

         return train_loader
     def random_resize(self, data_loader, epoch, rank, is_distributed):
+        tensor = torch.LongTensor(2).cuda()
         if rank == 0:
+            size_factor = self.input_size[1] * 1. / self.input_size[0]
+            size = random.randint(*self.random_size)
+            size = (int(32 * size), 32 * int(size * size_factor))
+            tensor[0] = size[0]
+            tensor[1] = size[1]
         if is_distributed:
             dist.barrier()
             dist.broadcast(tensor, 0)
         input_size = data_loader.change_input_dim(
+            multiple=(tensor[0].item(), tensor[1].item()), random_range=None
         )
         return input_size

yolox/models/darknet.py CHANGED Viewed

@@ -86,7 +86,11 @@ class Darknet(nn.Module):
 class CSPDarknet(nn.Module):
-    def __init__(self, dep_mul, wid_mul, out_features=("dark3", "dark4", "dark5"), depthwise=False):
         super().__init__()
         assert out_features, "please provide output features of Darknet"
         self.out_features = out_features
@@ -96,33 +100,42 @@ class CSPDarknet(nn.Module):
         base_depth = max(round(dep_mul * 3), 1)  # 3
         # stem
-        self.stem = Focus(3, base_channels, ksize=3)
         # dark2
         self.dark2 = nn.Sequential(
-            Conv(base_channels, base_channels * 2, 3, 2),
-            CSPLayer(base_channels * 2, base_channels * 2, n=base_depth, depthwise=depthwise),
         )
         # dark3
         self.dark3 = nn.Sequential(
-            Conv(base_channels * 2, base_channels * 4, 3, 2),
-            CSPLayer(base_channels * 4, base_channels * 4, n=base_depth * 3, depthwise=depthwise),
         )
         # dark4
         self.dark4 = nn.Sequential(
-            Conv(base_channels * 4, base_channels * 8, 3, 2),
-            CSPLayer(base_channels * 8, base_channels * 8, n=base_depth * 3, depthwise=depthwise),
         )
         # dark5
         self.dark5 = nn.Sequential(
-            Conv(base_channels * 8, base_channels * 16, 3, 2),
-            SPPBottleneck(base_channels * 16, base_channels * 16),
             CSPLayer(
                 base_channels * 16, base_channels * 16, n=base_depth,
-                shortcut=False, depthwise=depthwise,
             ),
         )

 class CSPDarknet(nn.Module):
+    def __init__(
+        self, dep_mul, wid_mul,
+        out_features=("dark3", "dark4", "dark5"),
+        depthwise=False, act="silu",
+    ):
         super().__init__()
         assert out_features, "please provide output features of Darknet"
         self.out_features = out_features
         base_depth = max(round(dep_mul * 3), 1)  # 3
         # stem
+        self.stem = Focus(3, base_channels, ksize=3, act=act)
         # dark2
         self.dark2 = nn.Sequential(
+            Conv(base_channels, base_channels * 2, 3, 2, act=act),
+            CSPLayer(
+                base_channels * 2, base_channels * 2,
+                n=base_depth, depthwise=depthwise, act=act
+            ),
         )
         # dark3
         self.dark3 = nn.Sequential(
+            Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
+            CSPLayer(
+                base_channels * 4, base_channels * 4,
+                n=base_depth * 3, depthwise=depthwise, act=act,
+            ),
         )
         # dark4
         self.dark4 = nn.Sequential(
+            Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
+            CSPLayer(
+                base_channels * 8, base_channels * 8,
+                n=base_depth * 3, depthwise=depthwise, act=act,
+            ),
         )
         # dark5
         self.dark5 = nn.Sequential(
+            Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
+            SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
             CSPLayer(
                 base_channels * 16, base_channels * 16, n=base_depth,
+                shortcut=False, depthwise=depthwise, act=act,
             ),
         )

yolox/models/network_blocks.py CHANGED Viewed

@@ -72,12 +72,15 @@ class DWConv(nn.Module):
 class Bottleneck(nn.Module):
     # Standard bottleneck
-    def __init__(self, in_channels, out_channels, shortcut=True, expansion=0.5, depthwise=False):
         super().__init__()
         hidden_channels = int(out_channels * expansion)
         Conv = DWConv if depthwise else BaseConv
-        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1)
-        self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1)
         self.use_add = shortcut and in_channels == out_channels
     def forward(self, x):
@@ -124,7 +127,7 @@ class CSPLayer(nn.Module):
     def __init__(
         self, in_channels, out_channels, n=1,
-        shortcut=True, expansion=0.5, depthwise=False
     ):
         """
         Args:
@@ -135,11 +138,11 @@ class CSPLayer(nn.Module):
         # ch_in, ch_out, number, shortcut, groups, expansion
         super().__init__()
         hidden_channels = int(out_channels * expansion)  # hidden channels
-        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1)
-        self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1)
-        self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1)  # act=FReLU(c2)
         module_list = [
-            Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise)
             for _ in range(n)
         ]
         self.m = nn.Sequential(*module_list)
@@ -155,9 +158,9 @@ class CSPLayer(nn.Module):
 class Focus(nn.Module):
     """Focus width and height information into channel space."""
-    def __init__(self, in_channels, out_channels, ksize=1, stride=1):
         super().__init__()
-        self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride)
     def forward(self, x):
         # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)

 class Bottleneck(nn.Module):
     # Standard bottleneck
+    def __init__(
+        self, in_channels, out_channels, shortcut=True,
+        expansion=0.5, depthwise=False, act="silu"
+    ):
         super().__init__()
         hidden_channels = int(out_channels * expansion)
         Conv = DWConv if depthwise else BaseConv
+        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
+        self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act)
         self.use_add = shortcut and in_channels == out_channels
     def forward(self, x):
     def __init__(
         self, in_channels, out_channels, n=1,
+        shortcut=True, expansion=0.5, depthwise=False, act="silu"
     ):
         """
         Args:
         # ch_in, ch_out, number, shortcut, groups, expansion
         super().__init__()
         hidden_channels = int(out_channels * expansion)  # hidden channels
+        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
+        self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
+        self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)
         module_list = [
+            Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act)
             for _ in range(n)
         ]
         self.m = nn.Sequential(*module_list)
 class Focus(nn.Module):
     """Focus width and height information into channel space."""
+    def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"):
         super().__init__()
+        self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)
     def forward(self, x):
         # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)

yolox/models/yolo_pafpn.py CHANGED Viewed

@@ -16,17 +16,17 @@ class YOLOPAFPN(nn.Module):
     def __init__(
         self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
-        in_channels=[256, 512, 1024], depthwise=False,
     ):
         super().__init__()
-        self.backbone = CSPDarknet(depth, width, depthwise=depthwise)
         self.in_features = in_features
         self.in_channels = in_channels
         Conv = DWConv if depthwise else BaseConv
         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
         self.lateral_conv0 = BaseConv(
-            int(in_channels[2] * width), int(in_channels[1] * width), 1, 1
         )
         self.C3_p4 = CSPLayer(
             int(2 * in_channels[1] * width),
@@ -34,10 +34,11 @@ class YOLOPAFPN(nn.Module):
             round(3 * depth),
             False,
             depthwise=depthwise,
         )  # cat
         self.reduce_conv1 = BaseConv(
-            int(in_channels[1] * width), int(in_channels[0] * width), 1, 1
         )
         self.C3_p3 = CSPLayer(
             int(2 * in_channels[0] * width),
@@ -45,26 +46,33 @@ class YOLOPAFPN(nn.Module):
             round(3 * depth),
             False,
             depthwise=depthwise,
         )
         # bottom-up conv
-        self.bu_conv2 = Conv(int(in_channels[0] * width), int(in_channels[0] * width), 3, 2)
         self.C3_n3 = CSPLayer(
             int(2 * in_channels[0] * width),
             int(in_channels[1] * width),
             round(3 * depth),
             False,
             depthwise=depthwise,
         )
         # bottom-up conv
-        self.bu_conv1 = Conv(int(in_channels[1] * width), int(in_channels[1] * width), 3, 2)
         self.C3_n4 = CSPLayer(
             int(2 * in_channels[1] * width),
             int(in_channels[2] * width),
             round(3 * depth),
             False,
             depthwise=depthwise,
         )
     def forward(self, input):

     def __init__(
         self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
+        in_channels=[256, 512, 1024], depthwise=False, act="silu",
     ):
         super().__init__()
+        self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
         self.in_features = in_features
         self.in_channels = in_channels
         Conv = DWConv if depthwise else BaseConv
         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
         self.lateral_conv0 = BaseConv(
+            int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
         )
         self.C3_p4 = CSPLayer(
             int(2 * in_channels[1] * width),
             round(3 * depth),
             False,
             depthwise=depthwise,
+            act=act,
         )  # cat
         self.reduce_conv1 = BaseConv(
+            int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
         )
         self.C3_p3 = CSPLayer(
             int(2 * in_channels[0] * width),
             round(3 * depth),
             False,
             depthwise=depthwise,
+            act=act,
         )
         # bottom-up conv
+        self.bu_conv2 = Conv(
+            int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
+        )
         self.C3_n3 = CSPLayer(
             int(2 * in_channels[0] * width),
             int(in_channels[1] * width),
             round(3 * depth),
             False,
             depthwise=depthwise,
+            act=act,
         )
         # bottom-up conv
+        self.bu_conv1 = Conv(
+            int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
+        )
         self.C3_n4 = CSPLayer(
             int(2 * in_channels[1] * width),
             int(in_channels[2] * width),
             round(3 * depth),
             False,
             depthwise=depthwise,
+            act=act,
         )
     def forward(self, input):

yolox/utils/visualize.py CHANGED Viewed

@@ -5,6 +5,8 @@
 import cv2
 import numpy as np
 def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):

 import cv2
 import numpy as np
+__all__ = ["vis"]
 def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):