Spaces:

tidalove
/

yolox

Sleeping

App Files Files Community

Feng Wang commited on Apr 15, 2022

Commit

e23ae72

1 Parent(s): cd9bfd6

feat(layers): support jit op (#1241)

Browse files

Files changed (14) hide show

MANIFEST.in +2 -0
README.md +1 -0
setup.py +29 -34
tools/eval.py +9 -1
tools/train.py +2 -1
yolox/__init__.py +0 -4
yolox/exp/yolox_base.py +2 -7
yolox/layers/__init__.py +9 -1
yolox/layers/{csrc/cocoeval → cocoeval}/cocoeval.cpp +0 -0
yolox/layers/{csrc/cocoeval → cocoeval}/cocoeval.h +13 -0
yolox/layers/csrc/vision.cpp +0 -13
yolox/layers/fast_coco_eval_api.py +7 -6
yolox/layers/jit_ops.py +138 -0
yolox/utils/dist.py +8 -1

MANIFEST.in ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ include requirements.txt
2	+ recursive-include yolox .cpp .h .cu .cuh *.cc

README.md CHANGED Viewed

@@ -10,6 +10,7 @@ This repo is an implementation of PyTorch version YOLOX, there is also a [MegEng
 <img src="assets/git_fig.png" width="1000" >
 ## Updates!!
 * 【2021/08/19】 We optimize the training process with **2x** faster training and **~1%** higher performance! See [notes](docs/updates_note.md) for more details.
 * 【2021/08/05】 We release [MegEngine version YOLOX](https://github.com/MegEngine/YOLOX).
 * 【2021/07/28】 We fix the fatal error of [memory leak](https://github.com/Megvii-BaseDetection/YOLOX/issues/103)

 <img src="assets/git_fig.png" width="1000" >
 ## Updates!!
+* 【2022/04/14】 We suport jit compile op.
 * 【2021/08/19】 We optimize the training process with **2x** faster training and **~1%** higher performance! See [notes](docs/updates_note.md) for more details.
 * 【2021/08/05】 We release [MegEngine version YOLOX](https://github.com/MegEngine/YOLOX).
 * 【2021/07/28】 We fix the fatal error of [memory leak](https://github.com/Megvii-BaseDetection/YOLOX/issues/103)

setup.py CHANGED Viewed

@@ -3,38 +3,14 @@
 import re
 import setuptools
-import glob
-from os import path
-import torch
-from torch.utils.cpp_extension import CppExtension
-def get_extensions():
-    this_dir = path.dirname(path.abspath(__file__))
-    extensions_dir = path.join(this_dir, "yolox", "layers", "csrc")
-    main_source = path.join(extensions_dir, "vision.cpp")
-    sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"))
-    sources = [main_source] + sources
-    extension = CppExtension
-    extra_compile_args = {"cxx": ["-O3"]}
-    define_macros = []
-    include_dirs = [extensions_dir]
-    ext_modules = [
-        extension(
-            "yolox._C",
-            sources,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-        )
-    ]
-    return ext_modules
 def get_package_dir():
@@ -67,23 +43,42 @@ def get_long_description():
     return long_description
 setuptools.setup(
     name="yolox",
     version=get_yolox_version(),
     author="megvii basedet team",
     url="https://github.com/Megvii-BaseDetection/YOLOX",
     package_dir=get_package_dir(),
     python_requires=">=3.6",
     install_requires=get_install_requirements(),
     long_description=get_long_description(),
     long_description_content_type="text/markdown",
-    ext_modules=get_extensions(),
     classifiers=[
         "Programming Language :: Python :: 3", "Operating System :: OS Independent",
         "License :: OSI Approved :: Apache Software License",
     ],
-    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
-    packages=setuptools.find_packages(),
     project_urls={
         "Documentation": "https://yolox.readthedocs.io",
         "Source": "https://github.com/Megvii-BaseDetection/YOLOX",

 import re
 import setuptools
+import sys
+TORCH_AVAILABLE = True
+try:
+    import torch
+except ImportError:
+    TORCH_AVAILABLE = False
+    print("[WARNING] Unable to import torch, pre-compiling ops will be disabled.")
 def get_package_dir():
     return long_description
+def get_ext_modules():
+    ext_module = []
+    if sys.platform != "win32":  # pre-compile ops on linux
+        assert TORCH_AVAILABLE, "torch is required for pre-compiling ops, please install it first."
+        # if any other op is added, please also add it here
+        from yolox.layers import FastCOCOEvalOp
+        ext_module.append(FastCOCOEvalOp().build_op())
+    return ext_module
+def get_cmd_class():
+    cmdclass = {}
+    if TORCH_AVAILABLE:
+        cmdclass["build_ext"] = torch.utils.cpp_extension.BuildExtension
+    return cmdclass
 setuptools.setup(
     name="yolox",
     version=get_yolox_version(),
     author="megvii basedet team",
     url="https://github.com/Megvii-BaseDetection/YOLOX",
     package_dir=get_package_dir(),
+    packages=setuptools.find_packages(exclude=("tests", "tools")) + list(get_package_dir().keys()),
     python_requires=">=3.6",
     install_requires=get_install_requirements(),
+    setup_requires=["wheel"],  # avoid building error when pip is not updated
     long_description=get_long_description(),
     long_description_content_type="text/markdown",
+    include_package_data=True,  # include files in MANIFEST.in
+    ext_modules=get_ext_modules(),
+    cmdclass=get_cmd_class(),
     classifiers=[
         "Programming Language :: Python :: 3", "Operating System :: OS Independent",
         "License :: OSI Approved :: Apache Software License",
     ],
     project_urls={
         "Documentation": "https://yolox.readthedocs.io",
         "Source": "https://github.com/Megvii-BaseDetection/YOLOX",

tools/eval.py CHANGED Viewed

@@ -14,7 +14,14 @@ from torch.nn.parallel import DistributedDataParallel as DDP
 from yolox.core import launch
 from yolox.exp import get_exp
-from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger
 def make_parser():
@@ -190,6 +197,7 @@ def main(exp, args, num_gpu):
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
     exp.merge(args.opts)

 from yolox.core import launch
 from yolox.exp import get_exp
+from yolox.utils import (
+    configure_module,
+    configure_nccl,
+    fuse_model,
+    get_local_rank,
+    get_model_info,
+    setup_logger
+)
 def make_parser():
 if __name__ == "__main__":
+    configure_module()
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
     exp.merge(args.opts)

tools/train.py CHANGED Viewed

@@ -12,7 +12,7 @@ import torch.backends.cudnn as cudnn
 from yolox.core import Trainer, launch
 from yolox.exp import get_exp
-from yolox.utils import configure_nccl, configure_omp, get_num_devices
 def make_parser():
@@ -118,6 +118,7 @@ def main(exp, args):
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
     exp.merge(args.opts)

 from yolox.core import Trainer, launch
 from yolox.exp import get_exp
+from yolox.utils import configure_module, configure_nccl, configure_omp, get_num_devices
 def make_parser():
 if __name__ == "__main__":
+    configure_module()
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
     exp.merge(args.opts)

yolox/__init__.py CHANGED Viewed

@@ -1,8 +1,4 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
-from .utils import configure_module
-configure_module()
 __version__ = "0.2.0"

 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 __version__ = "0.2.0"

yolox/exp/yolox_base.py CHANGED Viewed

@@ -139,14 +139,9 @@ class Exp(BaseExp):
             MosaicDetection,
             worker_init_reset_seed,
         )
-        from yolox.utils import (
-            wait_for_the_master,
-            get_local_rank,
-        )
-        local_rank = get_local_rank()
-        with wait_for_the_master(local_rank):
             dataset = COCODataset(
                 data_dir=self.data_dir,
                 json_file=self.train_ann,

             MosaicDetection,
             worker_init_reset_seed,
         )
+        from yolox.utils import wait_for_the_master
+        with wait_for_the_master():
             dataset = COCODataset(
                 data_dir=self.data_dir,
                 json_file=self.train_ann,

yolox/layers/__init__.py CHANGED Viewed

@@ -2,4 +2,12 @@
 # -*- coding:utf-8 -*-
 # Copyright (c) Megvii Inc. All rights reserved.
-from .fast_coco_eval_api import COCOeval_opt

 # -*- coding:utf-8 -*-
 # Copyright (c) Megvii Inc. All rights reserved.
+# import torch first to make jit op work without `ImportError of libc10.so`
+import torch  # noqa
+from .jit_ops import FastCOCOEvalOp, JitOp
+try:
+    from .fast_coco_eval_api import COCOeval_opt
+except ImportError:  #  exception will be raised when users build yolox from source
+    pass

yolox/layers/{csrc/cocoeval → cocoeval}/cocoeval.cpp RENAMED Viewed

File without changes

yolox/layers/{csrc/cocoeval → cocoeval}/cocoeval.h RENAMED Viewed

@@ -83,3 +83,16 @@ py::dict Accumulate(
     const std::vector<ImageEvaluation>& evalutations);
 } // namespace COCOeval

     const std::vector<ImageEvaluation>& evalutations);
 } // namespace COCOeval
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
+{
+    m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
+    m.def(
+        "COCOevalEvaluateImages",
+        &COCOeval::EvaluateImages,
+        "COCOeval::EvaluateImages");
+    pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
+        .def(pybind11::init<uint64_t, double, double, bool, bool>());
+    pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
+        .def(pybind11::init<>());
+}

yolox/layers/csrc/vision.cpp DELETED Viewed

@@ -1,13 +0,0 @@
-#include "cocoeval/cocoeval.h"
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
-    m.def(
-        "COCOevalEvaluateImages",
-        &COCOeval::EvaluateImages,
-        "COCOeval::EvaluateImages");
-    pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
-        .def(pybind11::init<uint64_t, double, double, bool, bool>());
-    pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
-        .def(pybind11::init<>());
-}

yolox/layers/fast_coco_eval_api.py CHANGED Viewed

@@ -11,9 +11,7 @@ import time
 import numpy as np
 from pycocotools.cocoeval import COCOeval
-# import torch first to make yolox._C work without ImportError of libc10.so
-# in YOLOX, env is already set in __init__.py.
-from yolox import _C
 class COCOeval_opt(COCOeval):
@@ -21,6 +19,9 @@ class COCOeval_opt(COCOeval):
     This is a slightly modified version of the original COCO API, where the functions evaluateImg()
     and accumulate() are implemented in C++ to speedup evaluation
     """
     def evaluate(self):
         """
@@ -72,7 +73,7 @@ class COCOeval_opt(COCOeval):
             # to access in C++
             instances_cpp = []
             for instance in instances:
-                instance_cpp = _C.InstanceAnnotation(
                     int(instance["id"]),
                     instance["score"] if is_det else instance.get("score", 0.0),
                     instance["area"],
@@ -106,7 +107,7 @@ class COCOeval_opt(COCOeval):
             ]
         # Call C++ implementation of self.evaluateImgs()
-        self._evalImgs_cpp = _C.COCOevalEvaluateImages(
             p.areaRng,
             maxDet,
             p.iouThrs,
@@ -131,7 +132,7 @@ class COCOeval_opt(COCOeval):
         if not hasattr(self, "_evalImgs_cpp"):
             print("Please run evaluate() first")
-        self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
         # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
         self.eval["recall"] = np.array(self.eval["recall"]).reshape(

 import numpy as np
 from pycocotools.cocoeval import COCOeval
+from .jit_ops import FastCOCOEvalOp
 class COCOeval_opt(COCOeval):
     This is a slightly modified version of the original COCO API, where the functions evaluateImg()
     and accumulate() are implemented in C++ to speedup evaluation
     """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.module = FastCOCOEvalOp().load()
     def evaluate(self):
         """
             # to access in C++
             instances_cpp = []
             for instance in instances:
+                instance_cpp = self.module.InstanceAnnotation(
                     int(instance["id"]),
                     instance["score"] if is_det else instance.get("score", 0.0),
                     instance["area"],
             ]
         # Call C++ implementation of self.evaluateImgs()
+        self._evalImgs_cpp = self.module.COCOevalEvaluateImages(
             p.areaRng,
             maxDet,
             p.iouThrs,
         if not hasattr(self, "_evalImgs_cpp"):
             print("Please run evaluate() first")
+        self.eval = self.module.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
         # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
         self.eval["recall"] = np.array(self.eval["recall"]).reshape(

yolox/layers/jit_ops.py ADDED Viewed

	@@ -0,0 +1,138 @@

+#!/usr/bin/env python3
+# Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved
+import glob
+import importlib
+import os
+import sys
+import time
+from typing import List
+__all__ = ["JitOp", "FastCOCOEvalOp"]
+class JitOp:
+    """
+    Just-in-time compilation of ops.
+    Some code of `JitOp` is inspired by `deepspeed.op_builder`,
+    check the following link for more details:
+    https://github.com/microsoft/DeepSpeed/blob/master/op_builder/builder.py
+    """
+    def __init__(self, name):
+        self.name = name
+    def absolute_name(self) -> str:
+        """Get absolute build path for cases where the op is pre-installed."""
+        pass
+    def sources(self) -> List:
+        """Get path list of source files of op.
+        NOTE: the path should be elative to root of package during building,
+            Otherwise, exception will be raised when building package.
+            However, for runtime building, path will be absolute.
+        """
+        pass
+    def include_dirs(self) -> List:
+        """
+        Get list of include paths, relative to root of package.
+        NOTE: the path should be elative to root of package.
+            Otherwise, exception will be raised when building package.
+        """
+        return []
+    def define_macros(self) -> List:
+        """Get list of macros to define for op"""
+        return []
+    def cxx_args(self) -> List:
+        """Get optional list of compiler flags to forward"""
+        args = ["-O2"] if sys.platform == "win32" else ["-O3", "-std=c++14", "-g", "-Wno-reorder"]
+        return args
+    def nvcc_args(self) -> List:
+        """Get optional list of compiler flags to forward to nvcc when building CUDA sources"""
+        args = [
+            "-O3", "--use_fast_math",
+            "-std=c++17" if sys.platform == "win32" else "-std=c++14",
+            "-U__CUDA_NO_HALF_OPERATORS__",
+            "-U__CUDA_NO_HALF_CONVERSIONS__",
+            "-U__CUDA_NO_HALF2_OPERATORS__",
+        ]
+        return args
+    def build_op(self):
+        from torch.utils.cpp_extension import CppExtension
+        return CppExtension(
+            name=self.absolute_name(),
+            sources=self.sources(),
+            include_dirs=self.include_dirs(),
+            define_macros=self.define_macros(),
+            extra_compile_args={
+                "cxx": self.cxx_args(),
+            },
+        )
+    def load(self, verbose=True):
+        try:
+            # try to import op from pre-installed package
+            return importlib.import_module(self.absolute_name())
+        except Exception:  # op not compiled, jit load
+            from yolox.utils import wait_for_the_master
+            with wait_for_the_master():  # to avoid race condition
+                return self.jit_load(verbose)
+    def jit_load(self, verbose=True):
+        from torch.utils.cpp_extension import load
+        from loguru import logger
+        try:
+            import ninja  # noqa
+        except ImportError:
+            if verbose:
+                logger.warning(
+                    f"Ninja is not installed, fall back to normal installation for {self.name}."
+                )
+        build_tik = time.time()
+        # build op and load
+        op_module = load(
+            name=self.name,
+            sources=self.sources(),
+            extra_cflags=self.cxx_args(),
+            extra_cuda_cflags=self.nvcc_args(),
+            verbose=verbose,
+        )
+        build_duration = time.time() - build_tik
+        if verbose:
+            logger.info(f"Load {self.name} op in {build_duration:.3f}s.")
+        return op_module
+    def clear_dynamic_library(self):
+        """Remove dynamic libraray files generated by JIT compilation."""
+        module = self.load()
+        os.remove(module.__file__)
+class FastCOCOEvalOp(JitOp):
+    def __init__(self, name="fast_cocoeval"):
+        super().__init__(name=name)
+    def absolute_name(self):
+        return f'yolox.layers.{self.name}'
+    def sources(self):
+        sources = glob.glob(os.path.join("yolox", "layers", "cocoeval", "*.cpp"))
+        if not sources:  # source will be empty list if the so file is removed after install
+            # use abosolute path to compile
+            import yolox
+            code_path = os.path.join(yolox.__path__[0], "layers", "cocoeval", "*.cpp")
+            sources = glob.glob(code_path)
+        return sources
+    def include_dirs(self):
+        return [os.path.join("yolox", "layers", "cocoeval")]

yolox/utils/dist.py CHANGED Viewed

@@ -49,10 +49,17 @@ def get_num_devices():
 @contextmanager
-def wait_for_the_master(local_rank: int):
     """
     Make all processes waiting for the master to do some task.
     """
     if local_rank > 0:
         dist.barrier()
     yield

 @contextmanager
+def wait_for_the_master(local_rank: int = None):
     """
     Make all processes waiting for the master to do some task.
+    Args:
+        local_rank (int): the rank of the current process. Default to None.
+            If None, it will use the rank of the current process.
     """
+    if local_rank is None:
+        local_rank = get_local_rank()
     if local_rank > 0:
         dist.barrier()
     yield