feat(yolox): support torch amp and img caching, update preprocess logic (#523)
Browse files- README.md +33 -12
- demo/MegEngine/cpp/yolox.cpp +1 -4
- demo/MegEngine/python/demo.py +1 -3
- demo/ONNXRuntime/README.md +7 -7
- demo/ONNXRuntime/onnx_inference.py +1 -3
- demo/OpenVINO/cpp/README.md +10 -8
- demo/OpenVINO/cpp/yolox_openvino.cpp +1 -5
- demo/OpenVINO/python/README.md +7 -7
- demo/OpenVINO/python/openvino_inference.py +1 -3
- demo/TensorRT/cpp/yolox.cpp +1 -5
- demo/ncnn/cpp/yolox.cpp +1 -8
- docs/model_zoo.md +31 -7
- docs/quick_run.md +4 -11
- docs/train_custom_data.md +3 -2
- docs/updates_note.md +55 -0
- exps/default/nano.py +2 -1
- exps/example/yolox_voc/yolox_voc_s.py +26 -24
- tools/demo.py +17 -7
- tools/eval.py +8 -1
- tools/train.py +10 -3
- yolox/core/launch.py +30 -6
- yolox/core/trainer.py +11 -21
- yolox/data/__init__.py +1 -1
- yolox/data/data_augment.py +20 -57
- yolox/data/data_prefetcher.py +0 -26
- yolox/data/dataloading.py +10 -75
- yolox/data/datasets/coco.py +93 -16
- yolox/data/datasets/datasets_wrapper.py +5 -19
- yolox/data/datasets/mosaicdetection.py +23 -11
- yolox/data/datasets/voc.py +113 -17
- yolox/data/samplers.py +4 -14
- yolox/exp/yolox_base.py +45 -31
- yolox/models/yolo_head.py +8 -7
- yolox/utils/dist.py +32 -2
README.md
CHANGED
@@ -10,6 +10,7 @@ This repo is an implementation of PyTorch version YOLOX, there is also a [MegEng
|
|
10 |
<img src="assets/git_fig.png" width="1000" >
|
11 |
|
12 |
## Updates!!
|
|
|
13 |
* γ2021/08/05γ We release [MegEngine version YOLOX](https://github.com/MegEngine/YOLOX).
|
14 |
* γ2021/07/28γ We fix the fatal error of [memory leak](https://github.com/Megvii-BaseDetection/YOLOX/issues/103)
|
15 |
* γ2021/07/26γ We now support [MegEngine](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/MegEngine) deployment.
|
@@ -24,6 +25,18 @@ This repo is an implementation of PyTorch version YOLOX, there is also a [MegEng
|
|
24 |
## Benchmark
|
25 |
|
26 |
#### Standard Models.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
|
28 |
| ------ |:---: | :---: |:---: |:---: | :---: | :----: |
|
29 |
|[YOLOX-s](./exps/default/yolox_s.py) |640 |39.6 |9.8 |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) |
|
@@ -32,11 +45,25 @@ This repo is an implementation of PyTorch version YOLOX, there is also a [MegEng
|
|
32 |
|[YOLOX-x](./exps/default/yolox_x.py) |640 |**51.2** | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) |
|
33 |
|[YOLOX-Darknet53](./exps/default/yolov3.py) |640 | 47.4 | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) |
|
34 |
|
|
|
|
|
35 |
#### Light Models.
|
|
|
36 |
|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |
|
37 |
| ------ |:---: | :---: |:---: |:---: | :---: |
|
38 |
-
|[YOLOX-Nano](./exps/default/nano.py) |416 |25.
|
39 |
-
|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416 |32.8 | 5.06 |6.45 | [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
## Quick Start
|
42 |
|
@@ -50,15 +77,8 @@ cd YOLOX
|
|
50 |
pip3 install -U pip && pip3 install -r requirements.txt
|
51 |
pip3 install -v -e . # or python3 setup.py develop
|
52 |
```
|
53 |
-
Step2. Install [apex](https://github.com/NVIDIA/apex).
|
54 |
|
55 |
-
|
56 |
-
# skip this step if you don't want to train model.
|
57 |
-
git clone https://github.com/NVIDIA/apex
|
58 |
-
cd apex
|
59 |
-
pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
|
60 |
-
```
|
61 |
-
Step3. Install [pycocotools](https://github.com/cocodataset/cocoapi).
|
62 |
|
63 |
```shell
|
64 |
pip3 install cython; pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
|
@@ -100,7 +120,7 @@ ln -s /path/to/your/COCO ./datasets/COCO
|
|
100 |
Step2. Reproduce our results on COCO by specifying -n:
|
101 |
|
102 |
```shell
|
103 |
-
python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o
|
104 |
yolox-m
|
105 |
yolox-l
|
106 |
yolox-x
|
@@ -108,10 +128,11 @@ python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o
|
|
108 |
* -d: number of gpu devices
|
109 |
* -b: total batch size, the recommended number for -b is num-gpu * 8
|
110 |
* --fp16: mixed precision training
|
|
|
111 |
|
112 |
When using -f, the above commands are equivalent to:
|
113 |
```shell
|
114 |
-
python tools/train.py -f exps/default/yolox_s.py -d 8 -b 64 --fp16 -o
|
115 |
exps/default/yolox_m.py
|
116 |
exps/default/yolox_l.py
|
117 |
exps/default/yolox_x.py
|
|
|
10 |
<img src="assets/git_fig.png" width="1000" >
|
11 |
|
12 |
## Updates!!
|
13 |
+
* γ2021/08/19γ We optimize the training process with **2x** faster training and **~1%** higher performance! See [notes](docs/updates_note.md) for more details.
|
14 |
* γ2021/08/05γ We release [MegEngine version YOLOX](https://github.com/MegEngine/YOLOX).
|
15 |
* γ2021/07/28γ We fix the fatal error of [memory leak](https://github.com/Megvii-BaseDetection/YOLOX/issues/103)
|
16 |
* γ2021/07/26γ We now support [MegEngine](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/MegEngine) deployment.
|
|
|
25 |
## Benchmark
|
26 |
|
27 |
#### Standard Models.
|
28 |
+
|
29 |
+
|Model |size |mAP<sup>val<br>0.5:0.95 |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
|
30 |
+
| ------ |:---: | :---: | :---: |:---: |:---: | :---: | :----: |
|
31 |
+
|[YOLOX-s](./exps/default/yolox_s.py) |640 |40.5 |40.5 |9.8 |9.0 | 26.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth) |
|
32 |
+
|[YOLOX-m](./exps/default/yolox_m.py) |640 |46.9 |47.2 |12.3 |25.3 |73.8| [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth) |
|
33 |
+
|[YOLOX-l](./exps/default/yolox_l.py) |640 |47.7 |50.1 |14.5 |54.2| 155.6 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.pth) |
|
34 |
+
|[YOLOX-x](./exps/default/yolox_x.py) |640 |51.1 |**51.5** | 17.3 |99.1 |281.9 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth) |
|
35 |
+
|[YOLOX-Darknet53](./exps/default/yolov3.py) |640 | 47.7 | 48.0 | 11.1 |63.7 | 185.3 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_darknet.pth) |
|
36 |
+
|
37 |
+
<details>
|
38 |
+
<summary>Legacy models</summary>
|
39 |
+
|
40 |
|Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
|
41 |
| ------ |:---: | :---: |:---: |:---: | :---: | :----: |
|
42 |
|[YOLOX-s](./exps/default/yolox_s.py) |640 |39.6 |9.8 |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) |
|
|
|
45 |
|[YOLOX-x](./exps/default/yolox_x.py) |640 |**51.2** | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) |
|
46 |
|[YOLOX-Darknet53](./exps/default/yolov3.py) |640 | 47.4 | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) |
|
47 |
|
48 |
+
</details>
|
49 |
+
|
50 |
#### Light Models.
|
51 |
+
|
52 |
|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |
|
53 |
| ------ |:---: | :---: |:---: |:---: | :---: |
|
54 |
+
|[YOLOX-Nano](./exps/default/nano.py) |416 |25.8 | 0.91 |1.08 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.pth) |
|
55 |
+
|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416 |32.8 | 5.06 |6.45 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.pth) |
|
56 |
+
|
57 |
+
|
58 |
+
<details>
|
59 |
+
<summary>Legacy models</summary>
|
60 |
+
|
61 |
+
|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |
|
62 |
+
| ------ |:---: | :---: |:---: |:---: | :---: |
|
63 |
+
|[YOLOX-Nano](./exps/default/nano.py) |416 |25.3 | 0.91 |1.08 | [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano.pth) |
|
64 |
+
|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416 |32.8 | 5.06 |6.45 | [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_32dot8.pth) |
|
65 |
+
|
66 |
+
</details>
|
67 |
|
68 |
## Quick Start
|
69 |
|
|
|
77 |
pip3 install -U pip && pip3 install -r requirements.txt
|
78 |
pip3 install -v -e . # or python3 setup.py develop
|
79 |
```
|
|
|
80 |
|
81 |
+
Step2. Install [pycocotools](https://github.com/cocodataset/cocoapi).
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
```shell
|
84 |
pip3 install cython; pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
|
|
|
120 |
Step2. Reproduce our results on COCO by specifying -n:
|
121 |
|
122 |
```shell
|
123 |
+
python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache]
|
124 |
yolox-m
|
125 |
yolox-l
|
126 |
yolox-x
|
|
|
128 |
* -d: number of gpu devices
|
129 |
* -b: total batch size, the recommended number for -b is num-gpu * 8
|
130 |
* --fp16: mixed precision training
|
131 |
+
* --cache: caching imgs into RAM to accelarate training, which need large system RAM.
|
132 |
|
133 |
When using -f, the above commands are equivalent to:
|
134 |
```shell
|
135 |
+
python tools/train.py -f exps/default/yolox_s.py -d 8 -b 64 --fp16 -o [--cache]
|
136 |
exps/default/yolox_m.py
|
137 |
exps/default/yolox_l.py
|
138 |
exps/default/yolox_x.py
|
demo/MegEngine/cpp/yolox.cpp
CHANGED
@@ -35,17 +35,14 @@ cv::Mat static_resize(cv::Mat &img) {
|
|
35 |
}
|
36 |
|
37 |
void blobFromImage(cv::Mat &img, float *blob_data) {
|
38 |
-
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
39 |
int channels = 3;
|
40 |
int img_h = img.rows;
|
41 |
int img_w = img.cols;
|
42 |
-
std::vector<float> mean = {0.485, 0.456, 0.406};
|
43 |
-
std::vector<float> std = {0.229, 0.224, 0.225};
|
44 |
for (size_t c = 0; c < channels; c++) {
|
45 |
for (size_t h = 0; h < img_h; h++) {
|
46 |
for (size_t w = 0; w < img_w; w++) {
|
47 |
blob_data[c * img_w * img_h + h * img_w + w] =
|
48 |
-
(
|
49 |
}
|
50 |
}
|
51 |
}
|
|
|
35 |
}
|
36 |
|
37 |
void blobFromImage(cv::Mat &img, float *blob_data) {
|
|
|
38 |
int channels = 3;
|
39 |
int img_h = img.rows;
|
40 |
int img_w = img.cols;
|
|
|
|
|
41 |
for (size_t c = 0; c < channels; c++) {
|
42 |
for (size_t h = 0; h < img_h; h++) {
|
43 |
for (size_t w = 0; w < img_w; w++) {
|
44 |
blob_data[c * img_w * img_h + h * img_w + w] =
|
45 |
+
(float)img.at<cv::Vec3b>(h, w)[c];
|
46 |
}
|
47 |
}
|
48 |
}
|
demo/MegEngine/python/demo.py
CHANGED
@@ -107,8 +107,6 @@ class Predictor(object):
|
|
107 |
self.confthre = confthre
|
108 |
self.nmsthre = nmsthre
|
109 |
self.test_size = test_size
|
110 |
-
self.rgb_means = (0.485, 0.456, 0.406)
|
111 |
-
self.std = (0.229, 0.224, 0.225)
|
112 |
|
113 |
def inference(self, img):
|
114 |
img_info = {"id": 0}
|
@@ -125,7 +123,7 @@ class Predictor(object):
|
|
125 |
img_info["width"] = width
|
126 |
img_info["raw_img"] = img
|
127 |
|
128 |
-
img, ratio = preprocess(img, self.test_size
|
129 |
img_info["ratio"] = ratio
|
130 |
img = F.expand_dims(mge.tensor(img), 0)
|
131 |
|
|
|
107 |
self.confthre = confthre
|
108 |
self.nmsthre = nmsthre
|
109 |
self.test_size = test_size
|
|
|
|
|
110 |
|
111 |
def inference(self, img):
|
112 |
img_info = {"id": 0}
|
|
|
123 |
img_info["width"] = width
|
124 |
img_info["raw_img"] = img
|
125 |
|
126 |
+
img, ratio = preprocess(img, self.test_size)
|
127 |
img_info["ratio"] = ratio
|
128 |
img = F.expand_dims(mge.tensor(img), 0)
|
129 |
|
demo/ONNXRuntime/README.md
CHANGED
@@ -6,13 +6,13 @@ This doc introduces how to convert your pytorch model into onnx, and how to run
|
|
6 |
|
7 |
| Model | Parameters | GFLOPs | Test Size | mAP | Weights |
|
8 |
|:------| :----: | :----: | :---: | :---: | :---: |
|
9 |
-
| YOLOX-Nano | 0.91M | 1.08 | 416x416 | 25.
|
10 |
-
| YOLOX-Tiny | 5.06M | 6.45 | 416x416 |32.8 | [
|
11 |
-
| YOLOX-S | 9.0M | 26.8 | 640x640 |
|
12 |
-
| YOLOX-M | 25.3M | 73.8 | 640x640 |
|
13 |
-
| YOLOX-L | 54.2M | 155.6 | 640x640 |50.
|
14 |
-
| YOLOX-Darknet53| 63.72M | 185.3 | 640x640 |
|
15 |
-
| YOLOX-X | 99.1M | 281.9 | 640x640 |51.
|
16 |
|
17 |
|
18 |
### Convert Your Model to ONNX
|
|
|
6 |
|
7 |
| Model | Parameters | GFLOPs | Test Size | mAP | Weights |
|
8 |
|:------| :----: | :----: | :---: | :---: | :---: |
|
9 |
+
| YOLOX-Nano | 0.91M | 1.08 | 416x416 | 25.8 |[github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.onnx) |
|
10 |
+
| YOLOX-Tiny | 5.06M | 6.45 | 416x416 |32.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.onnx) |
|
11 |
+
| YOLOX-S | 9.0M | 26.8 | 640x640 |40.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx) |
|
12 |
+
| YOLOX-M | 25.3M | 73.8 | 640x640 |47.2 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.onnx) |
|
13 |
+
| YOLOX-L | 54.2M | 155.6 | 640x640 |50.1 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.onnx) |
|
14 |
+
| YOLOX-Darknet53| 63.72M | 185.3 | 640x640 |48.0 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_darknet.onnx) |
|
15 |
+
| YOLOX-X | 99.1M | 281.9 | 640x640 |51.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox.onnx) |
|
16 |
|
17 |
|
18 |
### Convert Your Model to ONNX
|
demo/ONNXRuntime/onnx_inference.py
CHANGED
@@ -64,9 +64,7 @@ if __name__ == '__main__':
|
|
64 |
|
65 |
input_shape = tuple(map(int, args.input_shape.split(',')))
|
66 |
origin_img = cv2.imread(args.image_path)
|
67 |
-
|
68 |
-
std = (0.229, 0.224, 0.225)
|
69 |
-
img, ratio = preprocess(origin_img, input_shape, mean, std)
|
70 |
|
71 |
session = onnxruntime.InferenceSession(args.model)
|
72 |
|
|
|
64 |
|
65 |
input_shape = tuple(map(int, args.input_shape.split(',')))
|
66 |
origin_img = cv2.imread(args.image_path)
|
67 |
+
img, ratio = preprocess(origin_img, input_shape)
|
|
|
|
|
68 |
|
69 |
session = onnxruntime.InferenceSession(args.model)
|
70 |
|
demo/OpenVINO/cpp/README.md
CHANGED
@@ -6,13 +6,13 @@ This toturial includes a C++ demo for OpenVINO, as well as some converted models
|
|
6 |
|
7 |
| Model | Parameters | GFLOPs | Test Size | mAP | Weights |
|
8 |
|:------| :----: | :----: | :---: | :---: | :---: |
|
9 |
-
| [YOLOX-Nano](../../../exps/nano.py) | 0.91M | 1.08 | 416x416 | 25.
|
10 |
-
| [YOLOX-Tiny](../../../exps/yolox_tiny.py) | 5.06M | 6.45 | 416x416 |
|
11 |
-
| [YOLOX-S](../../../exps/yolox_s.py) | 9.0M | 26.8 | 640x640 |
|
12 |
-
| [YOLOX-M](../../../exps/yolox_m.py) | 25.3M | 73.8 | 640x640 |
|
13 |
-
| [YOLOX-L](../../../exps/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.
|
14 |
-
| [YOLOX-Darknet53](../../../exps/yolov3.py) | 63.72M | 185.3 | 640x640 |
|
15 |
-
| [YOLOX-X](../../../exps/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.
|
16 |
|
17 |
## Install OpenVINO Toolkit
|
18 |
|
@@ -72,9 +72,11 @@ source ~/.bashrc
|
|
72 |
```
|
73 |
For example:
|
74 |
```shell
|
75 |
-
python3 mo.py --input_model
|
76 |
```
|
77 |
|
|
|
|
|
78 |
## Build
|
79 |
|
80 |
### Linux
|
|
|
6 |
|
7 |
| Model | Parameters | GFLOPs | Test Size | mAP | Weights |
|
8 |
|:------| :----: | :----: | :---: | :---: | :---: |
|
9 |
+
| [YOLOX-Nano](../../../exps/default/nano.py) | 0.91M | 1.08 | 416x416 | 25.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano_openvino.tar.gz) |
|
10 |
+
| [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M | 6.45 | 416x416 |32.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny_openvino.tar.gz) |
|
11 |
+
| [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |40.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_openvino.tar.gz) |
|
12 |
+
| [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |47.2 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m_openvino.tar.gz) |
|
13 |
+
| [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.1 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l_openvino.tar.gz) |
|
14 |
+
| [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |48.0 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_dark_openvino.tar.gz) |
|
15 |
+
| [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x_openvino.tar.gz) |
|
16 |
|
17 |
## Install OpenVINO Toolkit
|
18 |
|
|
|
72 |
```
|
73 |
For example:
|
74 |
```shell
|
75 |
+
python3 mo.py --input_model yolox_tiny.onnx --input_shape [1,3,416,416] --data_type FP16
|
76 |
```
|
77 |
|
78 |
+
Make sure the input shape is consistent with [those](yolox_openvino.cpp#L24-L25) in cpp file.
|
79 |
+
|
80 |
## Build
|
81 |
|
82 |
### Linux
|
demo/OpenVINO/cpp/yolox_openvino.cpp
CHANGED
@@ -37,12 +37,9 @@ cv::Mat static_resize(cv::Mat& img) {
|
|
37 |
}
|
38 |
|
39 |
void blobFromImage(cv::Mat& img, Blob::Ptr& blob){
|
40 |
-
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
41 |
int channels = 3;
|
42 |
int img_h = img.rows;
|
43 |
int img_w = img.cols;
|
44 |
-
std::vector<float> mean = {0.485, 0.456, 0.406};
|
45 |
-
std::vector<float> std = {0.229, 0.224, 0.225};
|
46 |
InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
|
47 |
if (!mblob)
|
48 |
{
|
@@ -61,7 +58,7 @@ void blobFromImage(cv::Mat& img, Blob::Ptr& blob){
|
|
61 |
for (size_t w = 0; w < img_w; w++)
|
62 |
{
|
63 |
blob_data[c * img_w * img_h + h * img_w + w] =
|
64 |
-
(
|
65 |
}
|
66 |
}
|
67 |
}
|
@@ -513,7 +510,6 @@ int main(int argc, char* argv[]) {
|
|
513 |
auto moutputHolder = moutput->rmap();
|
514 |
const float* net_pred = moutputHolder.as<const PrecisionTrait<Precision::FP32>::value_type*>();
|
515 |
|
516 |
-
const int image_size = 416;
|
517 |
int img_w = image.cols;
|
518 |
int img_h = image.rows;
|
519 |
float scale = std::min(INPUT_W / (image.cols*1.0), INPUT_H / (image.rows*1.0));
|
|
|
37 |
}
|
38 |
|
39 |
void blobFromImage(cv::Mat& img, Blob::Ptr& blob){
|
|
|
40 |
int channels = 3;
|
41 |
int img_h = img.rows;
|
42 |
int img_w = img.cols;
|
|
|
|
|
43 |
InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
|
44 |
if (!mblob)
|
45 |
{
|
|
|
58 |
for (size_t w = 0; w < img_w; w++)
|
59 |
{
|
60 |
blob_data[c * img_w * img_h + h * img_w + w] =
|
61 |
+
(float)img.at<cv::Vec3b>(h, w)[c];
|
62 |
}
|
63 |
}
|
64 |
}
|
|
|
510 |
auto moutputHolder = moutput->rmap();
|
511 |
const float* net_pred = moutputHolder.as<const PrecisionTrait<Precision::FP32>::value_type*>();
|
512 |
|
|
|
513 |
int img_w = image.cols;
|
514 |
int img_h = image.rows;
|
515 |
float scale = std::min(INPUT_W / (image.cols*1.0), INPUT_H / (image.rows*1.0));
|
demo/OpenVINO/python/README.md
CHANGED
@@ -6,13 +6,13 @@ This toturial includes a Python demo for OpenVINO, as well as some converted mod
|
|
6 |
|
7 |
| Model | Parameters | GFLOPs | Test Size | mAP | Weights |
|
8 |
|:------| :----: | :----: | :---: | :---: | :---: |
|
9 |
-
| [YOLOX-Nano](../../../exps/default/nano.py) | 0.91M | 1.08 | 416x416 | 25.
|
10 |
-
| [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M | 6.45 | 416x416 |
|
11 |
-
| [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |
|
12 |
-
| [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |
|
13 |
-
| [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.
|
14 |
-
| [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |
|
15 |
-
| [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.
|
16 |
|
17 |
## Install OpenVINO Toolkit
|
18 |
|
|
|
6 |
|
7 |
| Model | Parameters | GFLOPs | Test Size | mAP | Weights |
|
8 |
|:------| :----: | :----: | :---: | :---: | :---: |
|
9 |
+
| [YOLOX-Nano](../../../exps/default/nano.py) | 0.91M | 1.08 | 416x416 | 25.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano_openvino.tar.gz) |
|
10 |
+
| [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M | 6.45 | 416x416 |32.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny_openvino.tar.gz) |
|
11 |
+
| [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |40.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_openvino.tar.gz) |
|
12 |
+
| [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |47.2 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m_openvino.tar.gz) |
|
13 |
+
| [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.1 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l_openvino.tar.gz) |
|
14 |
+
| [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |48.0 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_dark_openvino.tar.gz) |
|
15 |
+
| [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x_openvino.tar.gz) |
|
16 |
|
17 |
## Install OpenVINO Toolkit
|
18 |
|
demo/OpenVINO/python/openvino_inference.py
CHANGED
@@ -119,9 +119,7 @@ def main():
|
|
119 |
# ---------------------------Step 6. Prepare input---------------------------------------------------------------------
|
120 |
origin_img = cv2.imread(args.input)
|
121 |
_, _, h, w = net.input_info[input_blob].input_data.shape
|
122 |
-
|
123 |
-
std = (0.229, 0.224, 0.225)
|
124 |
-
image, ratio = preprocess(origin_img, (h, w), mean, std)
|
125 |
|
126 |
# ---------------------------Step 7. Do inference----------------------------------------------------------------------
|
127 |
log.info('Starting inference in synchronous mode')
|
|
|
119 |
# ---------------------------Step 6. Prepare input---------------------------------------------------------------------
|
120 |
origin_img = cv2.imread(args.input)
|
121 |
_, _, h, w = net.input_info[input_blob].input_data.shape
|
122 |
+
image, ratio = preprocess(origin_img, (h, w))
|
|
|
|
|
123 |
|
124 |
# ---------------------------Step 7. Do inference----------------------------------------------------------------------
|
125 |
log.info('Starting inference in synchronous mode')
|
demo/TensorRT/cpp/yolox.cpp
CHANGED
@@ -207,14 +207,10 @@ static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, fl
|
|
207 |
}
|
208 |
|
209 |
float* blobFromImage(cv::Mat& img){
|
210 |
-
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
211 |
-
|
212 |
float* blob = new float[img.total()*3];
|
213 |
int channels = 3;
|
214 |
int img_h = img.rows;
|
215 |
int img_w = img.cols;
|
216 |
-
std::vector<float> mean = {0.485, 0.456, 0.406};
|
217 |
-
std::vector<float> std = {0.229, 0.224, 0.225};
|
218 |
for (size_t c = 0; c < channels; c++)
|
219 |
{
|
220 |
for (size_t h = 0; h < img_h; h++)
|
@@ -222,7 +218,7 @@ float* blobFromImage(cv::Mat& img){
|
|
222 |
for (size_t w = 0; w < img_w; w++)
|
223 |
{
|
224 |
blob[c * img_w * img_h + h * img_w + w] =
|
225 |
-
(
|
226 |
}
|
227 |
}
|
228 |
}
|
|
|
207 |
}
|
208 |
|
209 |
float* blobFromImage(cv::Mat& img){
|
|
|
|
|
210 |
float* blob = new float[img.total()*3];
|
211 |
int channels = 3;
|
212 |
int img_h = img.rows;
|
213 |
int img_w = img.cols;
|
|
|
|
|
214 |
for (size_t c = 0; c < channels; c++)
|
215 |
{
|
216 |
for (size_t h = 0; h < img_h; h++)
|
|
|
218 |
for (size_t w = 0; w < img_w; w++)
|
219 |
{
|
220 |
blob[c * img_w * img_h + h * img_w + w] =
|
221 |
+
(float)img.at<cv::Vec3b>(h, w)[c];
|
222 |
}
|
223 |
}
|
224 |
}
|
demo/ncnn/cpp/yolox.cpp
CHANGED
@@ -279,7 +279,7 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
|
|
279 |
h = YOLOX_TARGET_SIZE;
|
280 |
w = w * scale;
|
281 |
}
|
282 |
-
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::
|
283 |
|
284 |
// pad to YOLOX_TARGET_SIZE rectangle
|
285 |
int wpad = YOLOX_TARGET_SIZE - w;
|
@@ -289,13 +289,6 @@ static int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)
|
|
289 |
// which means users don't need to extra padding info to decode boxes coordinate.
|
290 |
ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);
|
291 |
|
292 |
-
// python 0-1 input tensor with rgb_means = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225)
|
293 |
-
// so for 0-255 input image, rgb_mean should multiply 255 and norm should div by std.
|
294 |
-
const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f};
|
295 |
-
const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)};
|
296 |
-
|
297 |
-
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
298 |
-
|
299 |
ncnn::Extractor ex = yolox.create_extractor();
|
300 |
|
301 |
ex.input("images", in_pad);
|
|
|
279 |
h = YOLOX_TARGET_SIZE;
|
280 |
w = w * scale;
|
281 |
}
|
282 |
+
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, w, h);
|
283 |
|
284 |
// pad to YOLOX_TARGET_SIZE rectangle
|
285 |
int wpad = YOLOX_TARGET_SIZE - w;
|
|
|
289 |
// which means users don't need to extra padding info to decode boxes coordinate.
|
290 |
ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);
|
291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
ncnn::Extractor ex = yolox.create_extractor();
|
293 |
|
294 |
ex.input("images", in_pad);
|
docs/model_zoo.md
CHANGED
@@ -2,17 +2,41 @@
|
|
2 |
|
3 |
## Standard Models.
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
|
6 |
| ------ |:---: | :---: |:---: |:---: | :---: | :----: |
|
7 |
-
|[YOLOX-s](
|
8 |
-
|[YOLOX-m](
|
9 |
-
|[YOLOX-l](
|
10 |
-
|[YOLOX-x](
|
11 |
-
|[YOLOX-Darknet53](
|
|
|
|
|
12 |
|
13 |
## Light Models.
|
14 |
|
15 |
|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |
|
16 |
| ------ |:---: | :---: |:---: |:---: | :---: |
|
17 |
-
|[YOLOX-Nano](
|
18 |
-
|[YOLOX-Tiny](
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
## Standard Models.
|
4 |
|
5 |
+
|Model |size |mAP<sup>val<br>0.5:0.95 |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
|
6 |
+
| ------ |:---: | :---: | :---: |:---: |:---: | :---: | :----: |
|
7 |
+
|[YOLOX-s](./exps/default/yolox_s.py) |640 |40.5 |40.5 |9.8 |9.0 | 26.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth) |
|
8 |
+
|[YOLOX-m](./exps/default/yolox_m.py) |640 |46.9 |47.2 |12.3 |25.3 |73.8| [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth) |
|
9 |
+
|[YOLOX-l](./exps/default/yolox_l.py) |640 |47.7 |50.1 |14.5 |54.2| 155.6 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.pth) |
|
10 |
+
|[YOLOX-x](./exps/default/yolox_x.py) |640 |51.1 |**51.5** | 17.3 |99.1 |281.9 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth) |
|
11 |
+
|[YOLOX-Darknet53](./exps/default/yolov3.py) |640 | 47.7 | 48.0 | 11.1 |63.7 | 185.3 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_darknet.pth)
|
12 |
+
|
13 |
+
<details>
|
14 |
+
<summary>Legacy models</summary>
|
15 |
+
|
16 |
|Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
|
17 |
| ------ |:---: | :---: |:---: |:---: | :---: | :----: |
|
18 |
+
|[YOLOX-s](./exps/default/yolox_s.py) |640 |39.6 |9.8 |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) |
|
19 |
+
|[YOLOX-m](./exps/default/yolox_m.py) |640 |46.4 |12.3 |25.3 |73.8| [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERMTP7VFqrVBrXKMU7Vl4TcBQs0SUeCT7kvc-JdIbej4tQ?e=1MDo9y)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.pth) |
|
20 |
+
|[YOLOX-l](./exps/default/yolox_l.py) |640 |50.0 |14.5 |54.2| 155.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EWA8w_IEOzBKvuueBqfaZh0BeoG5sVzR-XYbOJO4YlOkRw?e=wHWOBE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.pth) |
|
21 |
+
|[YOLOX-x](./exps/default/yolox_x.py) |640 |**51.2** | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) |
|
22 |
+
|[YOLOX-Darknet53](./exps/default/yolov3.py) |640 | 47.4 | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) |
|
23 |
+
|
24 |
+
</details>
|
25 |
|
26 |
## Light Models.
|
27 |
|
28 |
|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |
|
29 |
| ------ |:---: | :---: |:---: |:---: | :---: |
|
30 |
+
|[YOLOX-Nano](./exps/default/nano.py) |416 |25.8 | 0.91 |1.08 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.pth) |
|
31 |
+
|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416 |32.8 | 5.06 |6.45 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.pth) |
|
32 |
+
|
33 |
+
|
34 |
+
<details>
|
35 |
+
<summary>Legacy models</summary>
|
36 |
+
|
37 |
+
|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |
|
38 |
+
| ------ |:---: | :---: |:---: |:---: | :---: |
|
39 |
+
|[YOLOX-Nano](./exps/default/nano.py) |416 |25.3 | 0.91 |1.08 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdcREey-krhLtdtSnxolxiUBjWMy6EFdiaO9bdOwZ5ygCQ?e=yQpdds)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano.pth) |
|
40 |
+
|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416 |32.8 | 5.06 |6.45 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EbZuinX5X1dJmNy8nqSRegABWspKw3QpXxuO82YSoFN1oQ?e=Q7V7XE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_32dot8.pth) |
|
41 |
+
|
42 |
+
</details>
|
docs/quick_run.md
CHANGED
@@ -10,15 +10,7 @@ cd YOLOX
|
|
10 |
pip3 install -U pip && pip3 install -r requirements.txt
|
11 |
pip3 install -v -e . # or python3 setup.py develop
|
12 |
```
|
13 |
-
Step2. Install [
|
14 |
-
|
15 |
-
```shell
|
16 |
-
# skip this step if you don't want to train model.
|
17 |
-
git clone https://github.com/NVIDIA/apex
|
18 |
-
cd apex
|
19 |
-
pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
|
20 |
-
```
|
21 |
-
Step3. Install [pycocotools](https://github.com/cocodataset/cocoapi).
|
22 |
|
23 |
```shell
|
24 |
pip3 install cython; pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
|
@@ -54,7 +46,7 @@ ln -s /path/to/your/COCO ./datasets/COCO
|
|
54 |
Step2. Reproduce our results on COCO by specifying -n:
|
55 |
|
56 |
```shell
|
57 |
-
python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o
|
58 |
yolox-m
|
59 |
yolox-l
|
60 |
yolox-x
|
@@ -62,6 +54,7 @@ python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o
|
|
62 |
* -d: number of gpu devices
|
63 |
* -b: total batch size, the recommended number for -b is num-gpu * 8
|
64 |
* --fp16: mixed precision training
|
|
|
65 |
|
66 |
**Multi Machine Training**
|
67 |
|
@@ -72,7 +65,7 @@ We also support multi-nodes training. Just add the following args:
|
|
72 |
When using -f, the above commands are equivalent to:
|
73 |
|
74 |
```shell
|
75 |
-
python tools/train.py -f exps/default/yolox-s.py -d 8 -b 64 --fp16 -o
|
76 |
exps/default/yolox-m.py
|
77 |
exps/default/yolox-l.py
|
78 |
exps/default/yolox-x.py
|
|
|
10 |
pip3 install -U pip && pip3 install -r requirements.txt
|
11 |
pip3 install -v -e . # or python3 setup.py develop
|
12 |
```
|
13 |
+
Step2. Install [pycocotools](https://github.com/cocodataset/cocoapi).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
```shell
|
16 |
pip3 install cython; pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
|
|
|
46 |
Step2. Reproduce our results on COCO by specifying -n:
|
47 |
|
48 |
```shell
|
49 |
+
python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache]
|
50 |
yolox-m
|
51 |
yolox-l
|
52 |
yolox-x
|
|
|
54 |
* -d: number of gpu devices
|
55 |
* -b: total batch size, the recommended number for -b is num-gpu * 8
|
56 |
* --fp16: mixed precision training
|
57 |
+
* --cache: caching imgs into RAM to accelarate training, which need large system RAM.
|
58 |
|
59 |
**Multi Machine Training**
|
60 |
|
|
|
65 |
When using -f, the above commands are equivalent to:
|
66 |
|
67 |
```shell
|
68 |
+
python tools/train.py -f exps/default/yolox-s.py -d 8 -b 64 --fp16 -o [--cache]
|
69 |
exps/default/yolox-m.py
|
70 |
exps/default/yolox-l.py
|
71 |
exps/default/yolox-x.py
|
docs/train_custom_data.md
CHANGED
@@ -69,12 +69,13 @@ Except special cases, we always recommend to use our [COCO pretrained weights](h
|
|
69 |
|
70 |
Once you get the Exp file and the COCO pretrained weights we provided, you can train your own model by the following below command:
|
71 |
```bash
|
72 |
-
python tools/train.py -f /path/to/your/Exp/file -d 8 -b 64 --fp16 -o -c /path/to/the/pretrained/weights
|
73 |
```
|
|
|
74 |
|
75 |
or take the `YOLOX-S` VOC training for example:
|
76 |
```bash
|
77 |
-
python tools/train.py -f exps/example/yolox_voc/yolox_voc_s.py -d 8 -b 64 --fp16 -o -c /path/to/yolox_s.pth
|
78 |
```
|
79 |
|
80 |
β§β§β§ For example:
|
|
|
69 |
|
70 |
Once you get the Exp file and the COCO pretrained weights we provided, you can train your own model by the following below command:
|
71 |
```bash
|
72 |
+
python tools/train.py -f /path/to/your/Exp/file -d 8 -b 64 --fp16 -o -c /path/to/the/pretrained/weights [--cache]
|
73 |
```
|
74 |
+
* --cache: we now support RAM caching to speed up training! Make sure you have enough system RAM when adopting it.
|
75 |
|
76 |
or take the `YOLOX-S` VOC training for example:
|
77 |
```bash
|
78 |
+
python tools/train.py -f exps/example/yolox_voc/yolox_voc_s.py -d 8 -b 64 --fp16 -o -c /path/to/yolox_s.pth [--cache]
|
79 |
```
|
80 |
|
81 |
β§β§β§ For example:
|
docs/updates_note.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Updates notes
|
3 |
+
|
4 |
+
## γ2021/08/19γ
|
5 |
+
|
6 |
+
* Support image caching for faster training, which requires large system RAM.
|
7 |
+
* Remove the dependence of apex and support torch amp training.
|
8 |
+
* Optimize the preprocessing for faster training
|
9 |
+
* Replace the older distort augmentation with new HSV aug for faster training and better performance.
|
10 |
+
|
11 |
+
### 2X Faster training
|
12 |
+
|
13 |
+
We optimize the data preprocess and support image caching with `--cache` flag:
|
14 |
+
|
15 |
+
```shell
|
16 |
+
python tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache]
|
17 |
+
yolox-m
|
18 |
+
yolox-l
|
19 |
+
yolox-x
|
20 |
+
```
|
21 |
+
* -d: number of gpu devices
|
22 |
+
* -b: total batch size, the recommended number for -b is num-gpu * 8
|
23 |
+
* --fp16: mixed precision training
|
24 |
+
* --cache: caching imgs into RAM to accelarate training, which need large system RAM.
|
25 |
+
|
26 |
+
### Higher performance
|
27 |
+
|
28 |
+
New models achive **~1%** higher performance! See [Model_Zoo](model_zoo.md) for more details.
|
29 |
+
|
30 |
+
### Support torch amp
|
31 |
+
|
32 |
+
We now support torch.cuda.amp training and Apex is not used anymore.
|
33 |
+
|
34 |
+
### Breaking changes
|
35 |
+
|
36 |
+
We remove the normalization operation like -mean/std. This will make the old weights **incompatible**.
|
37 |
+
|
38 |
+
If you still want to use old weights, you can add `--legacy' in demo and eval:
|
39 |
+
|
40 |
+
```shell
|
41 |
+
python tools/demo.py image -n yolox-s -c /path/to/your/yolox_s.pth --path assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu] [--legacy]
|
42 |
+
```
|
43 |
+
|
44 |
+
and
|
45 |
+
|
46 |
+
```shell
|
47 |
+
python tools/eval.py -n yolox-s -c yolox_s.pth -b 64 -d 8 --conf 0.001 [--fp16] [--fuse] [--legacy]
|
48 |
+
yolox-m
|
49 |
+
yolox-l
|
50 |
+
yolox-x
|
51 |
+
```
|
52 |
+
|
53 |
+
But for deployment demo, we don't suppor the old weights anymore. Users could checkout to YOLOX version 0.1.0 to use legacy weights for deployment
|
54 |
+
|
55 |
+
|
exps/default/nano.py
CHANGED
@@ -17,8 +17,9 @@ class Exp(MyExp):
|
|
17 |
self.scale = (0.5, 1.5)
|
18 |
self.random_size = (10, 20)
|
19 |
self.test_size = (416, 416)
|
20 |
-
self.
|
21 |
self.enable_mixup = False
|
|
|
22 |
|
23 |
def get_model(self, sublinear=False):
|
24 |
|
|
|
17 |
self.scale = (0.5, 1.5)
|
18 |
self.random_size = (10, 20)
|
19 |
self.test_size = (416, 416)
|
20 |
+
self.mosaic_prob = 0.5
|
21 |
self.enable_mixup = False
|
22 |
+
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
23 |
|
24 |
def get_model(self, sublinear=False):
|
25 |
|
exps/example/yolox_voc/yolox_voc_s.py
CHANGED
@@ -16,7 +16,7 @@ class Exp(MyExp):
|
|
16 |
self.width = 0.50
|
17 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
18 |
|
19 |
-
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
|
20 |
from yolox.data import (
|
21 |
VOCDetection,
|
22 |
TrainTransform,
|
@@ -24,34 +24,36 @@ class Exp(MyExp):
|
|
24 |
DataLoader,
|
25 |
InfiniteSampler,
|
26 |
MosaicDetection,
|
|
|
27 |
)
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
|
32 |
-
img_size=self.input_size,
|
33 |
-
preproc=TrainTransform(
|
34 |
-
rgb_means=(0.485, 0.456, 0.406),
|
35 |
-
std=(0.229, 0.224, 0.225),
|
36 |
-
max_labels=50,
|
37 |
-
),
|
38 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
dataset = MosaicDetection(
|
41 |
dataset,
|
42 |
mosaic=not no_aug,
|
43 |
img_size=self.input_size,
|
44 |
-
preproc=TrainTransform(
|
45 |
-
rgb_means=(0.485, 0.456, 0.406),
|
46 |
-
std=(0.229, 0.224, 0.225),
|
47 |
-
max_labels=120,
|
48 |
-
),
|
49 |
degrees=self.degrees,
|
50 |
translate=self.translate,
|
51 |
scale=self.scale,
|
52 |
shear=self.shear,
|
53 |
perspective=self.perspective,
|
54 |
enable_mixup=self.enable_mixup,
|
|
|
|
|
55 |
)
|
56 |
|
57 |
self.dataset = dataset
|
@@ -67,27 +69,27 @@ class Exp(MyExp):
|
|
67 |
sampler=sampler,
|
68 |
batch_size=batch_size,
|
69 |
drop_last=False,
|
70 |
-
input_dimension=self.input_size,
|
71 |
mosaic=not no_aug,
|
72 |
)
|
73 |
|
74 |
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
75 |
dataloader_kwargs["batch_sampler"] = batch_sampler
|
|
|
|
|
|
|
|
|
76 |
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
77 |
|
78 |
return train_loader
|
79 |
|
80 |
-
def get_eval_loader(self, batch_size, is_distributed, testdev=False):
|
81 |
from yolox.data import VOCDetection, ValTransform
|
82 |
|
83 |
valdataset = VOCDetection(
|
84 |
data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
|
85 |
image_sets=[('2007', 'test')],
|
86 |
img_size=self.test_size,
|
87 |
-
preproc=ValTransform(
|
88 |
-
rgb_means=(0.485, 0.456, 0.406),
|
89 |
-
std=(0.229, 0.224, 0.225),
|
90 |
-
),
|
91 |
)
|
92 |
|
93 |
if is_distributed:
|
@@ -108,10 +110,10 @@ class Exp(MyExp):
|
|
108 |
|
109 |
return val_loader
|
110 |
|
111 |
-
def get_evaluator(self, batch_size, is_distributed, testdev=False):
|
112 |
from yolox.evaluators import VOCEvaluator
|
113 |
|
114 |
-
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev
|
115 |
evaluator = VOCEvaluator(
|
116 |
dataloader=val_loader,
|
117 |
img_size=self.test_size,
|
|
|
16 |
self.width = 0.50
|
17 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
18 |
|
19 |
+
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
|
20 |
from yolox.data import (
|
21 |
VOCDetection,
|
22 |
TrainTransform,
|
|
|
24 |
DataLoader,
|
25 |
InfiniteSampler,
|
26 |
MosaicDetection,
|
27 |
+
worker_init_reset_seed,
|
28 |
)
|
29 |
+
from yolox.utils import (
|
30 |
+
wait_for_the_master,
|
31 |
+
get_local_rank,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
)
|
33 |
+
local_rank = get_local_rank()
|
34 |
+
|
35 |
+
with wait_for_the_master(local_rank):
|
36 |
+
dataset = VOCDetection(
|
37 |
+
data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
|
38 |
+
image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
|
39 |
+
img_size=self.input_size,
|
40 |
+
preproc=TrainTransform(max_labels=50),
|
41 |
+
cache=cache_img,
|
42 |
+
)
|
43 |
|
44 |
dataset = MosaicDetection(
|
45 |
dataset,
|
46 |
mosaic=not no_aug,
|
47 |
img_size=self.input_size,
|
48 |
+
preproc=TrainTransform(max_labels=120),
|
|
|
|
|
|
|
|
|
49 |
degrees=self.degrees,
|
50 |
translate=self.translate,
|
51 |
scale=self.scale,
|
52 |
shear=self.shear,
|
53 |
perspective=self.perspective,
|
54 |
enable_mixup=self.enable_mixup,
|
55 |
+
mosaic_prob=self.mosaic_prob,
|
56 |
+
mixup_prob=self.mixup_prob,
|
57 |
)
|
58 |
|
59 |
self.dataset = dataset
|
|
|
69 |
sampler=sampler,
|
70 |
batch_size=batch_size,
|
71 |
drop_last=False,
|
|
|
72 |
mosaic=not no_aug,
|
73 |
)
|
74 |
|
75 |
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
76 |
dataloader_kwargs["batch_sampler"] = batch_sampler
|
77 |
+
|
78 |
+
# Make sure each process has different random seed, especially for 'fork' method
|
79 |
+
dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
|
80 |
+
|
81 |
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
82 |
|
83 |
return train_loader
|
84 |
|
85 |
+
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
|
86 |
from yolox.data import VOCDetection, ValTransform
|
87 |
|
88 |
valdataset = VOCDetection(
|
89 |
data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
|
90 |
image_sets=[('2007', 'test')],
|
91 |
img_size=self.test_size,
|
92 |
+
preproc=ValTransform(legacy=legacy),
|
|
|
|
|
|
|
93 |
)
|
94 |
|
95 |
if is_distributed:
|
|
|
110 |
|
111 |
return val_loader
|
112 |
|
113 |
+
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
|
114 |
from yolox.evaluators import VOCEvaluator
|
115 |
|
116 |
+
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
|
117 |
evaluator = VOCEvaluator(
|
118 |
dataloader=val_loader,
|
119 |
img_size=self.test_size,
|
tools/demo.py
CHANGED
@@ -11,7 +11,7 @@ import cv2
|
|
11 |
|
12 |
import torch
|
13 |
|
14 |
-
from yolox.data.data_augment import
|
15 |
from yolox.data.datasets import COCO_CLASSES
|
16 |
from yolox.exp import get_exp
|
17 |
from yolox.utils import fuse_model, get_model_info, postprocess, vis
|
@@ -52,8 +52,8 @@ def make_parser():
|
|
52 |
type=str,
|
53 |
help="device to run our model, can either be cpu or gpu",
|
54 |
)
|
55 |
-
parser.add_argument("--conf", default=
|
56 |
-
parser.add_argument("--nms", default=
|
57 |
parser.add_argument("--tsize", default=None, type=int, help="test img size")
|
58 |
parser.add_argument(
|
59 |
"--fp16",
|
@@ -62,6 +62,13 @@ def make_parser():
|
|
62 |
action="store_true",
|
63 |
help="Adopting mix precision evaluating.",
|
64 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
parser.add_argument(
|
66 |
"--fuse",
|
67 |
dest="fuse",
|
@@ -99,6 +106,7 @@ class Predictor(object):
|
|
99 |
trt_file=None,
|
100 |
decoder=None,
|
101 |
device="cpu",
|
|
|
102 |
):
|
103 |
self.model = model
|
104 |
self.cls_names = cls_names
|
@@ -108,6 +116,7 @@ class Predictor(object):
|
|
108 |
self.nmsthre = exp.nmsthre
|
109 |
self.test_size = exp.test_size
|
110 |
self.device = device
|
|
|
111 |
if trt_file is not None:
|
112 |
from torch2trt import TRTModule
|
113 |
|
@@ -117,8 +126,6 @@ class Predictor(object):
|
|
117 |
x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
|
118 |
self.model(x)
|
119 |
self.model = model_trt
|
120 |
-
self.rgb_means = (0.485, 0.456, 0.406)
|
121 |
-
self.std = (0.229, 0.224, 0.225)
|
122 |
|
123 |
def inference(self, img):
|
124 |
img_info = {"id": 0}
|
@@ -133,8 +140,10 @@ class Predictor(object):
|
|
133 |
img_info["width"] = width
|
134 |
img_info["raw_img"] = img
|
135 |
|
136 |
-
|
137 |
img_info["ratio"] = ratio
|
|
|
|
|
138 |
img = torch.from_numpy(img).unsqueeze(0)
|
139 |
if self.device == "gpu":
|
140 |
img = img.cuda()
|
@@ -229,6 +238,7 @@ def main(exp, args):
|
|
229 |
file_name = os.path.join(exp.output_dir, args.experiment_name)
|
230 |
os.makedirs(file_name, exist_ok=True)
|
231 |
|
|
|
232 |
if args.save_result:
|
233 |
vis_folder = os.path.join(file_name, "vis_res")
|
234 |
os.makedirs(vis_folder, exist_ok=True)
|
@@ -280,7 +290,7 @@ def main(exp, args):
|
|
280 |
trt_file = None
|
281 |
decoder = None
|
282 |
|
283 |
-
predictor = Predictor(model, exp, COCO_CLASSES, trt_file, decoder, args.device)
|
284 |
current_time = time.localtime()
|
285 |
if args.demo == "image":
|
286 |
image_demo(predictor, vis_folder, args.path, current_time, args.save_result)
|
|
|
11 |
|
12 |
import torch
|
13 |
|
14 |
+
from yolox.data.data_augment import ValTransform
|
15 |
from yolox.data.datasets import COCO_CLASSES
|
16 |
from yolox.exp import get_exp
|
17 |
from yolox.utils import fuse_model, get_model_info, postprocess, vis
|
|
|
52 |
type=str,
|
53 |
help="device to run our model, can either be cpu or gpu",
|
54 |
)
|
55 |
+
parser.add_argument("--conf", default=0.3, type=float, help="test conf")
|
56 |
+
parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold")
|
57 |
parser.add_argument("--tsize", default=None, type=int, help="test img size")
|
58 |
parser.add_argument(
|
59 |
"--fp16",
|
|
|
62 |
action="store_true",
|
63 |
help="Adopting mix precision evaluating.",
|
64 |
)
|
65 |
+
parser.add_argument(
|
66 |
+
"--legacy",
|
67 |
+
dest="legacy",
|
68 |
+
default=False,
|
69 |
+
action="store_true",
|
70 |
+
help="To be compatible with older versions",
|
71 |
+
)
|
72 |
parser.add_argument(
|
73 |
"--fuse",
|
74 |
dest="fuse",
|
|
|
106 |
trt_file=None,
|
107 |
decoder=None,
|
108 |
device="cpu",
|
109 |
+
legacy=False,
|
110 |
):
|
111 |
self.model = model
|
112 |
self.cls_names = cls_names
|
|
|
116 |
self.nmsthre = exp.nmsthre
|
117 |
self.test_size = exp.test_size
|
118 |
self.device = device
|
119 |
+
self.preproc = ValTransform(legacy=legacy)
|
120 |
if trt_file is not None:
|
121 |
from torch2trt import TRTModule
|
122 |
|
|
|
126 |
x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
|
127 |
self.model(x)
|
128 |
self.model = model_trt
|
|
|
|
|
129 |
|
130 |
def inference(self, img):
|
131 |
img_info = {"id": 0}
|
|
|
140 |
img_info["width"] = width
|
141 |
img_info["raw_img"] = img
|
142 |
|
143 |
+
ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
|
144 |
img_info["ratio"] = ratio
|
145 |
+
|
146 |
+
img, _ = self.preproc(img, None, self.test_size)
|
147 |
img = torch.from_numpy(img).unsqueeze(0)
|
148 |
if self.device == "gpu":
|
149 |
img = img.cuda()
|
|
|
238 |
file_name = os.path.join(exp.output_dir, args.experiment_name)
|
239 |
os.makedirs(file_name, exist_ok=True)
|
240 |
|
241 |
+
vis_folder = None
|
242 |
if args.save_result:
|
243 |
vis_folder = os.path.join(file_name, "vis_res")
|
244 |
os.makedirs(vis_folder, exist_ok=True)
|
|
|
290 |
trt_file = None
|
291 |
decoder = None
|
292 |
|
293 |
+
predictor = Predictor(model, exp, COCO_CLASSES, trt_file, decoder, args.device, args.legacy)
|
294 |
current_time = time.localtime()
|
295 |
if args.demo == "image":
|
296 |
image_demo(predictor, vis_folder, args.path, current_time, args.save_result)
|
tools/eval.py
CHANGED
@@ -75,6 +75,13 @@ def make_parser():
|
|
75 |
action="store_true",
|
76 |
help="Using TensorRT model for testing.",
|
77 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
parser.add_argument(
|
79 |
"--test",
|
80 |
dest="test",
|
@@ -135,7 +142,7 @@ def main(exp, args, num_gpu):
|
|
135 |
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
|
136 |
logger.info("Model Structure:\n{}".format(str(model)))
|
137 |
|
138 |
-
evaluator = exp.get_evaluator(args.batch_size, is_distributed, args.test)
|
139 |
|
140 |
torch.cuda.set_device(rank)
|
141 |
model.cuda(rank)
|
|
|
75 |
action="store_true",
|
76 |
help="Using TensorRT model for testing.",
|
77 |
)
|
78 |
+
parser.add_argument(
|
79 |
+
"--legacy",
|
80 |
+
dest="legacy",
|
81 |
+
default=False,
|
82 |
+
action="store_true",
|
83 |
+
help="To be compatible with older versions",
|
84 |
+
)
|
85 |
parser.add_argument(
|
86 |
"--test",
|
87 |
dest="test",
|
|
|
142 |
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
|
143 |
logger.info("Model Structure:\n{}".format(str(model)))
|
144 |
|
145 |
+
evaluator = exp.get_evaluator(args.batch_size, is_distributed, args.test, args.legacy)
|
146 |
|
147 |
torch.cuda.set_device(rank)
|
148 |
model.cuda(rank)
|
tools/train.py
CHANGED
@@ -12,7 +12,7 @@ import torch.backends.cudnn as cudnn
|
|
12 |
|
13 |
from yolox.core import Trainer, launch
|
14 |
from yolox.exp import get_exp
|
15 |
-
from yolox.utils import configure_nccl, configure_omp
|
16 |
|
17 |
|
18 |
def make_parser():
|
@@ -65,6 +65,13 @@ def make_parser():
|
|
65 |
action="store_true",
|
66 |
help="Adopting mix precision training.",
|
67 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
parser.add_argument(
|
69 |
"-o",
|
70 |
"--occupy",
|
@@ -111,8 +118,8 @@ if __name__ == "__main__":
|
|
111 |
if not args.experiment_name:
|
112 |
args.experiment_name = exp.exp_name
|
113 |
|
114 |
-
num_gpu =
|
115 |
-
assert num_gpu <=
|
116 |
|
117 |
dist_url = "auto" if args.dist_url is None else args.dist_url
|
118 |
launch(
|
|
|
12 |
|
13 |
from yolox.core import Trainer, launch
|
14 |
from yolox.exp import get_exp
|
15 |
+
from yolox.utils import configure_nccl, configure_omp, get_num_devices
|
16 |
|
17 |
|
18 |
def make_parser():
|
|
|
65 |
action="store_true",
|
66 |
help="Adopting mix precision training.",
|
67 |
)
|
68 |
+
parser.add_argument(
|
69 |
+
"--cache",
|
70 |
+
dest="cache",
|
71 |
+
default=False,
|
72 |
+
action="store_true",
|
73 |
+
help="Caching imgs to RAM for fast training.",
|
74 |
+
)
|
75 |
parser.add_argument(
|
76 |
"-o",
|
77 |
"--occupy",
|
|
|
118 |
if not args.experiment_name:
|
119 |
args.experiment_name = exp.exp_name
|
120 |
|
121 |
+
num_gpu = get_num_devices() if args.devices is None else args.devices
|
122 |
+
assert num_gpu <= get_num_devices()
|
123 |
|
124 |
dist_url = "auto" if args.dist_url is None else args.dist_url
|
125 |
launch(
|
yolox/core/launch.py
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
# Copyright (c) Facebook, Inc. and its affiliates.
|
6 |
# Copyright (c) Megvii, Inc. and its affiliates.
|
7 |
|
|
|
8 |
from datetime import timedelta
|
9 |
from loguru import logger
|
10 |
|
@@ -61,18 +62,37 @@ def launch(
|
|
61 |
# TODO prctl in spawned processes
|
62 |
|
63 |
if dist_url == "auto":
|
64 |
-
assert
|
|
|
|
|
65 |
port = _find_free_port()
|
66 |
dist_url = f"tcp://127.0.0.1:{port}"
|
67 |
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
_distributed_worker,
|
70 |
nprocs=num_gpus_per_machine,
|
71 |
args=(
|
72 |
-
main_func,
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
74 |
),
|
75 |
daemon=False,
|
|
|
76 |
)
|
77 |
else:
|
78 |
main_func(*args)
|
@@ -89,7 +109,9 @@ def _distributed_worker(
|
|
89 |
args,
|
90 |
timeout=DEFAULT_TIMEOUT,
|
91 |
):
|
92 |
-
assert
|
|
|
|
|
93 |
global_rank = machine_rank * num_gpus_per_machine + local_rank
|
94 |
logger.info("Rank {} initialization finished.".format(global_rank))
|
95 |
try:
|
@@ -108,7 +130,9 @@ def _distributed_worker(
|
|
108 |
assert comm._LOCAL_PROCESS_GROUP is None
|
109 |
num_machines = world_size // num_gpus_per_machine
|
110 |
for i in range(num_machines):
|
111 |
-
ranks_on_i = list(
|
|
|
|
|
112 |
pg = dist.new_group(ranks_on_i)
|
113 |
if i == machine_rank:
|
114 |
comm._LOCAL_PROCESS_GROUP = pg
|
|
|
5 |
# Copyright (c) Facebook, Inc. and its affiliates.
|
6 |
# Copyright (c) Megvii, Inc. and its affiliates.
|
7 |
|
8 |
+
import sys
|
9 |
from datetime import timedelta
|
10 |
from loguru import logger
|
11 |
|
|
|
62 |
# TODO prctl in spawned processes
|
63 |
|
64 |
if dist_url == "auto":
|
65 |
+
assert (
|
66 |
+
num_machines == 1
|
67 |
+
), "dist_url=auto cannot work with distributed training."
|
68 |
port = _find_free_port()
|
69 |
dist_url = f"tcp://127.0.0.1:{port}"
|
70 |
|
71 |
+
start_method = "spawn"
|
72 |
+
cache = vars(args[1]).get("cache", False)
|
73 |
+
|
74 |
+
# To use numpy memmap for caching image into RAM, we have to use fork method
|
75 |
+
if cache:
|
76 |
+
assert sys.platform != "win32", (
|
77 |
+
"As Windows platform doesn't support fork method, "
|
78 |
+
"do not add --cache in your training command."
|
79 |
+
)
|
80 |
+
start_method = "fork"
|
81 |
+
|
82 |
+
mp.start_processes(
|
83 |
_distributed_worker,
|
84 |
nprocs=num_gpus_per_machine,
|
85 |
args=(
|
86 |
+
main_func,
|
87 |
+
world_size,
|
88 |
+
num_gpus_per_machine,
|
89 |
+
machine_rank,
|
90 |
+
backend,
|
91 |
+
dist_url,
|
92 |
+
args,
|
93 |
),
|
94 |
daemon=False,
|
95 |
+
start_method=start_method,
|
96 |
)
|
97 |
else:
|
98 |
main_func(*args)
|
|
|
109 |
args,
|
110 |
timeout=DEFAULT_TIMEOUT,
|
111 |
):
|
112 |
+
assert (
|
113 |
+
torch.cuda.is_available()
|
114 |
+
), "cuda is not available. Please check your installation."
|
115 |
global_rank = machine_rank * num_gpus_per_machine + local_rank
|
116 |
logger.info("Rank {} initialization finished.".format(global_rank))
|
117 |
try:
|
|
|
130 |
assert comm._LOCAL_PROCESS_GROUP is None
|
131 |
num_machines = world_size // num_gpus_per_machine
|
132 |
for i in range(num_machines):
|
133 |
+
ranks_on_i = list(
|
134 |
+
range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)
|
135 |
+
)
|
136 |
pg = dist.new_group(ranks_on_i)
|
137 |
if i == machine_rank:
|
138 |
comm._LOCAL_PROCESS_GROUP = pg
|
yolox/core/trainer.py
CHANGED
@@ -7,9 +7,8 @@ import os
|
|
7 |
import time
|
8 |
from loguru import logger
|
9 |
|
10 |
-
import apex
|
11 |
import torch
|
12 |
-
from
|
13 |
from torch.utils.tensorboard import SummaryWriter
|
14 |
|
15 |
from yolox.data import DataPrefetcher
|
@@ -41,6 +40,7 @@ class Trainer:
|
|
41 |
# training related attr
|
42 |
self.max_epoch = exp.max_epoch
|
43 |
self.amp_training = args.fp16
|
|
|
44 |
self.is_distributed = get_world_size() > 1
|
45 |
self.rank = get_rank()
|
46 |
self.local_rank = get_local_rank()
|
@@ -94,18 +94,18 @@ class Trainer:
|
|
94 |
inps = inps.to(self.data_type)
|
95 |
targets = targets.to(self.data_type)
|
96 |
targets.requires_grad = False
|
|
|
97 |
data_end_time = time.time()
|
98 |
|
99 |
-
|
|
|
|
|
100 |
loss = outputs["total_loss"]
|
101 |
|
102 |
self.optimizer.zero_grad()
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
else:
|
107 |
-
loss.backward()
|
108 |
-
self.optimizer.step()
|
109 |
|
110 |
if self.use_model_ema:
|
111 |
self.ema_model.update(self.model)
|
@@ -137,9 +137,6 @@ class Trainer:
|
|
137 |
# solver related init
|
138 |
self.optimizer = self.exp.get_optimizer(self.args.batch_size)
|
139 |
|
140 |
-
if self.amp_training:
|
141 |
-
model, optimizer = amp.initialize(model, self.optimizer, opt_level="O1")
|
142 |
-
|
143 |
# value of epoch will be set in `resume_train`
|
144 |
model = self.resume_train(model)
|
145 |
|
@@ -149,6 +146,7 @@ class Trainer:
|
|
149 |
batch_size=self.args.batch_size,
|
150 |
is_distributed=self.is_distributed,
|
151 |
no_aug=self.no_aug,
|
|
|
152 |
)
|
153 |
logger.info("init prefetcher, this might take one minute or less...")
|
154 |
self.prefetcher = DataPrefetcher(self.train_loader)
|
@@ -162,9 +160,7 @@ class Trainer:
|
|
162 |
occupy_mem(self.local_rank)
|
163 |
|
164 |
if self.is_distributed:
|
165 |
-
model =
|
166 |
-
# from torch.nn.parallel import DistributedDataParallel as DDP
|
167 |
-
# model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)
|
168 |
|
169 |
if self.use_model_ema:
|
170 |
self.ema_model = ModelEMA(model, 0.9998)
|
@@ -274,8 +270,6 @@ class Trainer:
|
|
274 |
model.load_state_dict(ckpt["model"])
|
275 |
self.optimizer.load_state_dict(ckpt["optimizer"])
|
276 |
# resume the training states variables
|
277 |
-
if self.amp_training and "amp" in ckpt:
|
278 |
-
amp.load_state_dict(ckpt["amp"])
|
279 |
start_epoch = (
|
280 |
self.args.start_epoch - 1
|
281 |
if self.args.start_epoch is not None
|
@@ -327,10 +321,6 @@ class Trainer:
|
|
327 |
"model": save_model.state_dict(),
|
328 |
"optimizer": self.optimizer.state_dict(),
|
329 |
}
|
330 |
-
if self.amp_training:
|
331 |
-
# save amp state according to
|
332 |
-
# https://nvidia.github.io/apex/amp.html#checkpointing
|
333 |
-
ckpt_state["amp"] = amp.state_dict()
|
334 |
save_checkpoint(
|
335 |
ckpt_state,
|
336 |
update_best_ckpt,
|
|
|
7 |
import time
|
8 |
from loguru import logger
|
9 |
|
|
|
10 |
import torch
|
11 |
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
12 |
from torch.utils.tensorboard import SummaryWriter
|
13 |
|
14 |
from yolox.data import DataPrefetcher
|
|
|
40 |
# training related attr
|
41 |
self.max_epoch = exp.max_epoch
|
42 |
self.amp_training = args.fp16
|
43 |
+
self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16)
|
44 |
self.is_distributed = get_world_size() > 1
|
45 |
self.rank = get_rank()
|
46 |
self.local_rank = get_local_rank()
|
|
|
94 |
inps = inps.to(self.data_type)
|
95 |
targets = targets.to(self.data_type)
|
96 |
targets.requires_grad = False
|
97 |
+
inps, targets = self.exp.preprocess(inps, targets, self.input_size)
|
98 |
data_end_time = time.time()
|
99 |
|
100 |
+
with torch.cuda.amp.autocast(enabled=self.amp_training):
|
101 |
+
outputs = self.model(inps, targets)
|
102 |
+
|
103 |
loss = outputs["total_loss"]
|
104 |
|
105 |
self.optimizer.zero_grad()
|
106 |
+
self.scaler.scale(loss).backward()
|
107 |
+
self.scaler.step(self.optimizer)
|
108 |
+
self.scaler.update()
|
|
|
|
|
|
|
109 |
|
110 |
if self.use_model_ema:
|
111 |
self.ema_model.update(self.model)
|
|
|
137 |
# solver related init
|
138 |
self.optimizer = self.exp.get_optimizer(self.args.batch_size)
|
139 |
|
|
|
|
|
|
|
140 |
# value of epoch will be set in `resume_train`
|
141 |
model = self.resume_train(model)
|
142 |
|
|
|
146 |
batch_size=self.args.batch_size,
|
147 |
is_distributed=self.is_distributed,
|
148 |
no_aug=self.no_aug,
|
149 |
+
cache_img=self.args.cache,
|
150 |
)
|
151 |
logger.info("init prefetcher, this might take one minute or less...")
|
152 |
self.prefetcher = DataPrefetcher(self.train_loader)
|
|
|
160 |
occupy_mem(self.local_rank)
|
161 |
|
162 |
if self.is_distributed:
|
163 |
+
model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)
|
|
|
|
|
164 |
|
165 |
if self.use_model_ema:
|
166 |
self.ema_model = ModelEMA(model, 0.9998)
|
|
|
270 |
model.load_state_dict(ckpt["model"])
|
271 |
self.optimizer.load_state_dict(ckpt["optimizer"])
|
272 |
# resume the training states variables
|
|
|
|
|
273 |
start_epoch = (
|
274 |
self.args.start_epoch - 1
|
275 |
if self.args.start_epoch is not None
|
|
|
321 |
"model": save_model.state_dict(),
|
322 |
"optimizer": self.optimizer.state_dict(),
|
323 |
}
|
|
|
|
|
|
|
|
|
324 |
save_checkpoint(
|
325 |
ckpt_state,
|
326 |
update_best_ckpt,
|
yolox/data/__init__.py
CHANGED
@@ -4,6 +4,6 @@
|
|
4 |
|
5 |
from .data_augment import TrainTransform, ValTransform
|
6 |
from .data_prefetcher import DataPrefetcher
|
7 |
-
from .dataloading import DataLoader, get_yolox_datadir
|
8 |
from .datasets import *
|
9 |
from .samplers import InfiniteSampler, YoloBatchSampler
|
|
|
4 |
|
5 |
from .data_augment import TrainTransform, ValTransform
|
6 |
from .data_prefetcher import DataPrefetcher
|
7 |
+
from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed
|
8 |
from .datasets import *
|
9 |
from .samplers import InfiniteSampler, YoloBatchSampler
|
yolox/data/data_augment.py
CHANGED
@@ -140,36 +140,6 @@ def random_perspective(
|
|
140 |
return img, targets
|
141 |
|
142 |
|
143 |
-
def _distort(image):
|
144 |
-
def _convert(image, alpha=1, beta=0):
|
145 |
-
tmp = image.astype(float) * alpha + beta
|
146 |
-
tmp[tmp < 0] = 0
|
147 |
-
tmp[tmp > 255] = 255
|
148 |
-
image[:] = tmp
|
149 |
-
|
150 |
-
image = image.copy()
|
151 |
-
|
152 |
-
if random.randrange(2):
|
153 |
-
_convert(image, beta=random.uniform(-32, 32))
|
154 |
-
|
155 |
-
if random.randrange(2):
|
156 |
-
_convert(image, alpha=random.uniform(0.5, 1.5))
|
157 |
-
|
158 |
-
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
159 |
-
|
160 |
-
if random.randrange(2):
|
161 |
-
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
162 |
-
tmp %= 180
|
163 |
-
image[:, :, 0] = tmp
|
164 |
-
|
165 |
-
if random.randrange(2):
|
166 |
-
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
167 |
-
|
168 |
-
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
169 |
-
|
170 |
-
return image
|
171 |
-
|
172 |
-
|
173 |
def _mirror(image, boxes):
|
174 |
_, width, _ = image.shape
|
175 |
if random.randrange(2):
|
@@ -179,36 +149,27 @@ def _mirror(image, boxes):
|
|
179 |
return image, boxes
|
180 |
|
181 |
|
182 |
-
def preproc(
|
183 |
-
if len(
|
184 |
-
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114
|
185 |
else:
|
186 |
-
padded_img = np.ones(input_size) * 114
|
187 |
-
|
188 |
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
|
189 |
resized_img = cv2.resize(
|
190 |
img,
|
191 |
(int(img.shape[1] * r), int(img.shape[0] * r)),
|
192 |
interpolation=cv2.INTER_LINEAR,
|
193 |
-
).astype(np.
|
194 |
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
|
195 |
|
196 |
-
padded_img = padded_img[:, :, ::-1]
|
197 |
-
padded_img /= 255.0
|
198 |
-
if mean is not None:
|
199 |
-
padded_img -= mean
|
200 |
-
if std is not None:
|
201 |
-
padded_img /= std
|
202 |
padded_img = padded_img.transpose(swap)
|
203 |
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
|
204 |
return padded_img, r
|
205 |
|
206 |
|
207 |
class TrainTransform:
|
208 |
-
def __init__(self,
|
209 |
-
self.means = rgb_means
|
210 |
-
self.std = std
|
211 |
-
self.p = p
|
212 |
self.max_labels = max_labels
|
213 |
|
214 |
def __call__(self, image, targets, input_dim):
|
@@ -216,8 +177,7 @@ class TrainTransform:
|
|
216 |
labels = targets[:, 4].copy()
|
217 |
if len(boxes) == 0:
|
218 |
targets = np.zeros((self.max_labels, 5), dtype=np.float32)
|
219 |
-
image, r_o = preproc(image, input_dim
|
220 |
-
image = np.ascontiguousarray(image, dtype=np.float32)
|
221 |
return image, targets
|
222 |
|
223 |
image_o = image.copy()
|
@@ -228,10 +188,10 @@ class TrainTransform:
|
|
228 |
# bbox_o: [xyxy] to [c_x,c_y,w,h]
|
229 |
boxes_o = xyxy2cxcywh(boxes_o)
|
230 |
|
231 |
-
|
232 |
-
image_t, boxes = _mirror(
|
233 |
height, width, _ = image_t.shape
|
234 |
-
image_t, r_ = preproc(image_t, input_dim
|
235 |
# boxes [xyxy] 2 [cx,cy,w,h]
|
236 |
boxes = xyxy2cxcywh(boxes)
|
237 |
boxes *= r_
|
@@ -241,7 +201,7 @@ class TrainTransform:
|
|
241 |
labels_t = labels[mask_b]
|
242 |
|
243 |
if len(boxes_t) == 0:
|
244 |
-
image_t, r_o = preproc(image_o, input_dim
|
245 |
boxes_o *= r_o
|
246 |
boxes_t = boxes_o
|
247 |
labels_t = labels_o
|
@@ -254,7 +214,6 @@ class TrainTransform:
|
|
254 |
: self.max_labels
|
255 |
]
|
256 |
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
|
257 |
-
image_t = np.ascontiguousarray(image_t, dtype=np.float32)
|
258 |
return image_t, padded_labels
|
259 |
|
260 |
|
@@ -276,12 +235,16 @@ class ValTransform:
|
|
276 |
data
|
277 |
"""
|
278 |
|
279 |
-
def __init__(self,
|
280 |
-
self.means = rgb_means
|
281 |
self.swap = swap
|
282 |
-
self.
|
283 |
|
284 |
# assume input is cv2 img for now
|
285 |
def __call__(self, img, res, input_size):
|
286 |
-
img, _ = preproc(img, input_size, self.
|
|
|
|
|
|
|
|
|
|
|
287 |
return img, np.zeros((1, 5))
|
|
|
140 |
return img, targets
|
141 |
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
def _mirror(image, boxes):
|
144 |
_, width, _ = image.shape
|
145 |
if random.randrange(2):
|
|
|
149 |
return image, boxes
|
150 |
|
151 |
|
152 |
+
def preproc(img, input_size, swap=(2, 0, 1)):
|
153 |
+
if len(img.shape) == 3:
|
154 |
+
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
|
155 |
else:
|
156 |
+
padded_img = np.ones(input_size, dtype=np.uint8) * 114
|
157 |
+
|
158 |
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
|
159 |
resized_img = cv2.resize(
|
160 |
img,
|
161 |
(int(img.shape[1] * r), int(img.shape[0] * r)),
|
162 |
interpolation=cv2.INTER_LINEAR,
|
163 |
+
).astype(np.uint8)
|
164 |
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
padded_img = padded_img.transpose(swap)
|
167 |
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
|
168 |
return padded_img, r
|
169 |
|
170 |
|
171 |
class TrainTransform:
|
172 |
+
def __init__(self, max_labels=50):
|
|
|
|
|
|
|
173 |
self.max_labels = max_labels
|
174 |
|
175 |
def __call__(self, image, targets, input_dim):
|
|
|
177 |
labels = targets[:, 4].copy()
|
178 |
if len(boxes) == 0:
|
179 |
targets = np.zeros((self.max_labels, 5), dtype=np.float32)
|
180 |
+
image, r_o = preproc(image, input_dim)
|
|
|
181 |
return image, targets
|
182 |
|
183 |
image_o = image.copy()
|
|
|
188 |
# bbox_o: [xyxy] to [c_x,c_y,w,h]
|
189 |
boxes_o = xyxy2cxcywh(boxes_o)
|
190 |
|
191 |
+
augment_hsv(image)
|
192 |
+
image_t, boxes = _mirror(image, boxes)
|
193 |
height, width, _ = image_t.shape
|
194 |
+
image_t, r_ = preproc(image_t, input_dim)
|
195 |
# boxes [xyxy] 2 [cx,cy,w,h]
|
196 |
boxes = xyxy2cxcywh(boxes)
|
197 |
boxes *= r_
|
|
|
201 |
labels_t = labels[mask_b]
|
202 |
|
203 |
if len(boxes_t) == 0:
|
204 |
+
image_t, r_o = preproc(image_o, input_dim)
|
205 |
boxes_o *= r_o
|
206 |
boxes_t = boxes_o
|
207 |
labels_t = labels_o
|
|
|
214 |
: self.max_labels
|
215 |
]
|
216 |
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
|
|
|
217 |
return image_t, padded_labels
|
218 |
|
219 |
|
|
|
235 |
data
|
236 |
"""
|
237 |
|
238 |
+
def __init__(self, swap=(2, 0, 1), legacy=False):
|
|
|
239 |
self.swap = swap
|
240 |
+
self.legacy = legacy
|
241 |
|
242 |
# assume input is cv2 img for now
|
243 |
def __call__(self, img, res, input_size):
|
244 |
+
img, _ = preproc(img, input_size, self.swap)
|
245 |
+
if self.legacy:
|
246 |
+
img = img[::-1, :, :].copy()
|
247 |
+
img /= 255.0
|
248 |
+
img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)
|
249 |
+
img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)
|
250 |
return img, np.zeros((1, 5))
|
yolox/data/data_prefetcher.py
CHANGED
@@ -2,12 +2,7 @@
|
|
2 |
# -*- coding:utf-8 -*-
|
3 |
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
|
5 |
-
import random
|
6 |
-
|
7 |
import torch
|
8 |
-
import torch.distributed as dist
|
9 |
-
|
10 |
-
from yolox.utils import synchronize
|
11 |
|
12 |
|
13 |
class DataPrefetcher:
|
@@ -54,24 +49,3 @@ class DataPrefetcher:
|
|
54 |
@staticmethod
|
55 |
def _record_stream_for_image(input):
|
56 |
input.record_stream(torch.cuda.current_stream())
|
57 |
-
|
58 |
-
|
59 |
-
def random_resize(data_loader, exp, epoch, rank, is_distributed):
|
60 |
-
tensor = torch.LongTensor(1).cuda()
|
61 |
-
if is_distributed:
|
62 |
-
synchronize()
|
63 |
-
|
64 |
-
if rank == 0:
|
65 |
-
if epoch > exp.max_epoch - 10:
|
66 |
-
size = exp.input_size
|
67 |
-
else:
|
68 |
-
size = random.randint(*exp.random_size)
|
69 |
-
size = int(32 * size)
|
70 |
-
tensor.fill_(size)
|
71 |
-
|
72 |
-
if is_distributed:
|
73 |
-
synchronize()
|
74 |
-
dist.broadcast(tensor, 0)
|
75 |
-
|
76 |
-
input_size = data_loader.change_input_dim(multiple=tensor.item(), random_range=None)
|
77 |
-
return input_size
|
|
|
2 |
# -*- coding:utf-8 -*-
|
3 |
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
|
|
|
|
|
5 |
import torch
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
class DataPrefetcher:
|
|
|
49 |
@staticmethod
|
50 |
def _record_stream_for_image(input):
|
51 |
input.record_stream(torch.cuda.current_stream())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
yolox/data/dataloading.py
CHANGED
@@ -4,6 +4,9 @@
|
|
4 |
|
5 |
import os
|
6 |
import random
|
|
|
|
|
|
|
7 |
|
8 |
import torch
|
9 |
from torch.utils.data.dataloader import DataLoader as torchDataLoader
|
@@ -32,41 +35,6 @@ class DataLoader(torchDataLoader):
|
|
32 |
See :class:`torch.utils.data.DataLoader` for more information on the arguments.
|
33 |
Check more on the following website:
|
34 |
https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
|
35 |
-
|
36 |
-
Note:
|
37 |
-
This dataloader only works with :class:`lightnet.data.Dataset` based datasets.
|
38 |
-
|
39 |
-
Example:
|
40 |
-
>>> class CustomSet(ln.data.Dataset):
|
41 |
-
... def __len__(self):
|
42 |
-
... return 4
|
43 |
-
... @ln.data.Dataset.resize_getitem
|
44 |
-
... def __getitem__(self, index):
|
45 |
-
... # Should return (image, anno) but here we return (input_dim,)
|
46 |
-
... return (self.input_dim,)
|
47 |
-
>>> dl = ln.data.DataLoader(
|
48 |
-
... CustomSet((200,200)),
|
49 |
-
... batch_size = 2,
|
50 |
-
... collate_fn = ln.data.list_collate # We want the data to be grouped as a list
|
51 |
-
... )
|
52 |
-
>>> dl.dataset.input_dim # Default input_dim
|
53 |
-
(200, 200)
|
54 |
-
>>> for d in dl:
|
55 |
-
... d
|
56 |
-
[[(200, 200), (200, 200)]]
|
57 |
-
[[(200, 200), (200, 200)]]
|
58 |
-
>>> dl.change_input_dim(320, random_range=None)
|
59 |
-
(320, 320)
|
60 |
-
>>> for d in dl:
|
61 |
-
... d
|
62 |
-
[[(320, 320), (320, 320)]]
|
63 |
-
[[(320, 320), (320, 320)]]
|
64 |
-
>>> dl.change_input_dim((480, 320), random_range=None)
|
65 |
-
(480, 320)
|
66 |
-
>>> for d in dl:
|
67 |
-
... d
|
68 |
-
[[(480, 320), (480, 320)]]
|
69 |
-
[[(480, 320), (480, 320)]]
|
70 |
"""
|
71 |
|
72 |
def __init__(self, *args, **kwargs):
|
@@ -120,46 +88,6 @@ class DataLoader(torchDataLoader):
|
|
120 |
def close_mosaic(self):
|
121 |
self.batch_sampler.mosaic = False
|
122 |
|
123 |
-
def change_input_dim(self, multiple=32, random_range=(10, 19)):
|
124 |
-
"""This function will compute a new size and update it on the next mini_batch.
|
125 |
-
|
126 |
-
Args:
|
127 |
-
multiple (int or tuple, optional): values to multiply the randomly generated range by.
|
128 |
-
Default **32**
|
129 |
-
random_range (tuple, optional): This (min, max) tuple sets the range
|
130 |
-
for the randomisation; Default **(10, 19)**
|
131 |
-
|
132 |
-
Return:
|
133 |
-
tuple: width, height tuple with new dimension
|
134 |
-
|
135 |
-
Note:
|
136 |
-
The new size is generated as follows: |br|
|
137 |
-
First we compute a random integer inside ``[random_range]``.
|
138 |
-
We then multiply that number with the ``multiple`` argument,
|
139 |
-
which gives our final new input size. |br|
|
140 |
-
If ``multiple`` is an integer we generate a square size. If you give a tuple
|
141 |
-
of **(width, height)**, the size is computed
|
142 |
-
as :math:`rng * multiple[0], rng * multiple[1]`.
|
143 |
-
|
144 |
-
Note:
|
145 |
-
You can set the ``random_range`` argument to **None** to set
|
146 |
-
an exact size of multiply. |br|
|
147 |
-
See the example above for how this works.
|
148 |
-
"""
|
149 |
-
if random_range is None:
|
150 |
-
size = 1
|
151 |
-
else:
|
152 |
-
size = random.randint(*random_range)
|
153 |
-
|
154 |
-
if isinstance(multiple, int):
|
155 |
-
size = (size * multiple, size * multiple)
|
156 |
-
else:
|
157 |
-
size = (size * multiple[0], size * multiple[1])
|
158 |
-
|
159 |
-
self.batch_sampler.new_input_dim = size
|
160 |
-
|
161 |
-
return size
|
162 |
-
|
163 |
|
164 |
def list_collate(batch):
|
165 |
"""
|
@@ -176,3 +104,10 @@ def list_collate(batch):
|
|
176 |
items[i] = default_collate(items[i])
|
177 |
|
178 |
return items
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
import os
|
6 |
import random
|
7 |
+
import uuid
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
|
11 |
import torch
|
12 |
from torch.utils.data.dataloader import DataLoader as torchDataLoader
|
|
|
35 |
See :class:`torch.utils.data.DataLoader` for more information on the arguments.
|
36 |
Check more on the following website:
|
37 |
https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
|
40 |
def __init__(self, *args, **kwargs):
|
|
|
88 |
def close_mosaic(self):
|
89 |
self.batch_sampler.mosaic = False
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
def list_collate(batch):
|
93 |
"""
|
|
|
104 |
items[i] = default_collate(items[i])
|
105 |
|
106 |
return items
|
107 |
+
|
108 |
+
|
109 |
+
def worker_init_reset_seed(worker_id):
|
110 |
+
seed = uuid.uuid4().int % 2**32
|
111 |
+
random.seed(seed)
|
112 |
+
torch.set_rng_state(torch.manual_seed(seed).get_state())
|
113 |
+
np.random.seed(seed)
|
yolox/data/datasets/coco.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
|
5 |
import os
|
|
|
6 |
|
7 |
import cv2
|
8 |
import numpy as np
|
@@ -24,6 +25,7 @@ class COCODataset(Dataset):
|
|
24 |
name="train2017",
|
25 |
img_size=(416, 416),
|
26 |
preproc=None,
|
|
|
27 |
):
|
28 |
"""
|
29 |
COCO dataset initialization. Annotation data are read into memory by COCO API.
|
@@ -45,17 +47,70 @@ class COCODataset(Dataset):
|
|
45 |
self.class_ids = sorted(self.coco.getCatIds())
|
46 |
cats = self.coco.loadCats(self.coco.getCatIds())
|
47 |
self._classes = tuple([c["name"] for c in cats])
|
48 |
-
self.
|
49 |
self.name = name
|
50 |
self.img_size = img_size
|
51 |
self.preproc = preproc
|
|
|
|
|
|
|
52 |
|
53 |
def __len__(self):
|
54 |
return len(self.ids)
|
55 |
|
|
|
|
|
|
|
56 |
def _load_coco_annotations(self):
|
57 |
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
def load_anno_from_ids(self, id_):
|
60 |
im_ann = self.coco.loadImgs(id_)[0]
|
61 |
width = im_ann["width"]
|
@@ -81,32 +136,56 @@ class COCODataset(Dataset):
|
|
81 |
res[ix, 0:4] = obj["clean_bbox"]
|
82 |
res[ix, 4] = cls
|
83 |
|
84 |
-
|
|
|
85 |
|
86 |
-
|
|
|
87 |
|
88 |
-
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
return (res, img_info, file_name)
|
91 |
|
92 |
def load_anno(self, index):
|
93 |
return self.annotations[index][0]
|
94 |
|
95 |
-
def
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
)
|
103 |
|
104 |
img = cv2.imread(img_file)
|
105 |
assert img is not None
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
return img, res.copy(), img_info, np.array([id_])
|
108 |
|
109 |
-
@Dataset.
|
110 |
def __getitem__(self, index):
|
111 |
"""
|
112 |
One image / label pair for the given index is picked up and pre-processed.
|
@@ -122,10 +201,8 @@ class COCODataset(Dataset):
|
|
122 |
class (float): class index.
|
123 |
xc, yc (float) : center of bbox whose values range from 0 to 1.
|
124 |
w, h (float) : size of bbox whose values range from 0 to 1.
|
125 |
-
info_img : tuple of h, w
|
126 |
h, w (int): original shape of the image
|
127 |
-
nh, nw (int): shape of the resized image without padding
|
128 |
-
dx, dy (int): pad size
|
129 |
img_id (int): same as the input index. Used for evaluation.
|
130 |
"""
|
131 |
img, target, img_info, img_id = self.pull_item(index)
|
|
|
3 |
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
|
5 |
import os
|
6 |
+
from loguru import logger
|
7 |
|
8 |
import cv2
|
9 |
import numpy as np
|
|
|
25 |
name="train2017",
|
26 |
img_size=(416, 416),
|
27 |
preproc=None,
|
28 |
+
cache=False,
|
29 |
):
|
30 |
"""
|
31 |
COCO dataset initialization. Annotation data are read into memory by COCO API.
|
|
|
47 |
self.class_ids = sorted(self.coco.getCatIds())
|
48 |
cats = self.coco.loadCats(self.coco.getCatIds())
|
49 |
self._classes = tuple([c["name"] for c in cats])
|
50 |
+
self.imgs = None
|
51 |
self.name = name
|
52 |
self.img_size = img_size
|
53 |
self.preproc = preproc
|
54 |
+
self.annotations = self._load_coco_annotations()
|
55 |
+
if cache:
|
56 |
+
self._cache_images()
|
57 |
|
58 |
def __len__(self):
|
59 |
return len(self.ids)
|
60 |
|
61 |
+
def __del__(self):
|
62 |
+
del self.imgs
|
63 |
+
|
64 |
def _load_coco_annotations(self):
|
65 |
return [self.load_anno_from_ids(_ids) for _ids in self.ids]
|
66 |
|
67 |
+
def _cache_images(self):
|
68 |
+
logger.warning(
|
69 |
+
"\n********************************************************************************\n"
|
70 |
+
"You are using cached images in RAM to accelerate training.\n"
|
71 |
+
"This requires large system RAM.\n"
|
72 |
+
"Make sure you have 200G+ RAM and 136G available disk space for training COCO.\n"
|
73 |
+
"********************************************************************************\n"
|
74 |
+
)
|
75 |
+
max_h = self.img_size[0]
|
76 |
+
max_w = self.img_size[1]
|
77 |
+
cache_file = self.data_dir + "/img_resized_cache_" + self.name + ".array"
|
78 |
+
if not os.path.exists(cache_file):
|
79 |
+
logger.info(
|
80 |
+
"Caching images for the frist time. This might take about 20 minutes for COCO"
|
81 |
+
)
|
82 |
+
self.imgs = np.memmap(
|
83 |
+
cache_file,
|
84 |
+
shape=(len(self.ids), max_h, max_w, 3),
|
85 |
+
dtype=np.uint8,
|
86 |
+
mode="w+",
|
87 |
+
)
|
88 |
+
from tqdm import tqdm
|
89 |
+
from multiprocessing.pool import ThreadPool
|
90 |
+
|
91 |
+
NUM_THREADs = min(8, os.cpu_count())
|
92 |
+
loaded_images = ThreadPool(NUM_THREADs).imap(
|
93 |
+
lambda x: self.load_resized_img(x),
|
94 |
+
range(len(self.annotations)),
|
95 |
+
)
|
96 |
+
pbar = tqdm(enumerate(loaded_images), total=len(self.annotations))
|
97 |
+
for k, out in pbar:
|
98 |
+
self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy()
|
99 |
+
self.imgs.flush()
|
100 |
+
pbar.close()
|
101 |
+
else:
|
102 |
+
logger.warning(
|
103 |
+
"You are using cached imgs! Make sure your dataset is not changed!!"
|
104 |
+
)
|
105 |
+
|
106 |
+
logger.info("Loading cached imgs...")
|
107 |
+
self.imgs = np.memmap(
|
108 |
+
cache_file,
|
109 |
+
shape=(len(self.ids), max_h, max_w, 3),
|
110 |
+
dtype=np.uint8,
|
111 |
+
mode="r+",
|
112 |
+
)
|
113 |
+
|
114 |
def load_anno_from_ids(self, id_):
|
115 |
im_ann = self.coco.loadImgs(id_)[0]
|
116 |
width = im_ann["width"]
|
|
|
136 |
res[ix, 0:4] = obj["clean_bbox"]
|
137 |
res[ix, 4] = cls
|
138 |
|
139 |
+
r = min(self.img_size[0] / height, self.img_size[1] / width)
|
140 |
+
res[:, :4] *= r
|
141 |
|
142 |
+
img_info = (height, width)
|
143 |
+
resized_info = (int(height * r), int(width * r))
|
144 |
|
145 |
+
file_name = (
|
146 |
+
im_ann["file_name"]
|
147 |
+
if "file_name" in im_ann
|
148 |
+
else "{:012}".format(id_) + ".jpg"
|
149 |
+
)
|
150 |
|
151 |
+
return (res, img_info, resized_info, file_name)
|
152 |
|
153 |
def load_anno(self, index):
|
154 |
return self.annotations[index][0]
|
155 |
|
156 |
+
def load_resized_img(self, index):
|
157 |
+
img = self.load_image(index)
|
158 |
+
r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
|
159 |
+
resized_img = cv2.resize(
|
160 |
+
img,
|
161 |
+
(int(img.shape[1] * r), int(img.shape[0] * r)),
|
162 |
+
interpolation=cv2.INTER_LINEAR,
|
163 |
+
).astype(np.uint8)
|
164 |
+
return resized_img
|
165 |
|
166 |
+
def load_image(self, index):
|
167 |
+
file_name = self.annotations[index][3]
|
168 |
+
|
169 |
+
img_file = os.path.join(self.data_dir, self.name, file_name)
|
|
|
170 |
|
171 |
img = cv2.imread(img_file)
|
172 |
assert img is not None
|
173 |
|
174 |
+
return img
|
175 |
+
|
176 |
+
def pull_item(self, index):
|
177 |
+
id_ = self.ids[index]
|
178 |
+
|
179 |
+
res, img_info, resized_info, _ = self.annotations[index]
|
180 |
+
if self.imgs is not None:
|
181 |
+
pad_img = self.imgs[index]
|
182 |
+
img = pad_img[: resized_info[0], : resized_info[1], :].copy()
|
183 |
+
else:
|
184 |
+
img = self.load_resized_img(index)
|
185 |
+
|
186 |
return img, res.copy(), img_info, np.array([id_])
|
187 |
|
188 |
+
@Dataset.mosaic_getitem
|
189 |
def __getitem__(self, index):
|
190 |
"""
|
191 |
One image / label pair for the given index is picked up and pre-processed.
|
|
|
201 |
class (float): class index.
|
202 |
xc, yc (float) : center of bbox whose values range from 0 to 1.
|
203 |
w, h (float) : size of bbox whose values range from 0 to 1.
|
204 |
+
info_img : tuple of h, w.
|
205 |
h, w (int): original shape of the image
|
|
|
|
|
206 |
img_id (int): same as the input index. Used for evaluation.
|
207 |
"""
|
208 |
img, target, img_info, img_id = self.pull_item(index)
|
yolox/data/datasets/datasets_wrapper.py
CHANGED
@@ -87,42 +87,28 @@ class Dataset(torchDataset):
|
|
87 |
return self.__input_dim
|
88 |
|
89 |
@staticmethod
|
90 |
-
def
|
91 |
"""
|
92 |
Decorator method that needs to be used around the ``__getitem__`` method. |br|
|
93 |
-
This decorator enables the
|
94 |
-
the ``input_dim`` with our :class:`~lightnet.data.DataLoader` class.
|
95 |
|
96 |
Example:
|
97 |
>>> class CustomSet(ln.data.Dataset):
|
98 |
... def __len__(self):
|
99 |
... return 10
|
100 |
-
... @ln.data.Dataset.
|
101 |
... def __getitem__(self, index):
|
102 |
-
...
|
103 |
-
... return self.input_dim
|
104 |
-
>>> data = CustomSet((200,200))
|
105 |
-
>>> data[0]
|
106 |
-
(200, 200)
|
107 |
-
>>> data[(480,320), 0]
|
108 |
-
(480, 320)
|
109 |
"""
|
110 |
|
111 |
@wraps(getitem_fn)
|
112 |
def wrapper(self, index):
|
113 |
if not isinstance(index, int):
|
114 |
-
|
115 |
-
self._input_dim = index[0]
|
116 |
-
self.enable_mosaic = index[2]
|
117 |
index = index[1]
|
118 |
-
else:
|
119 |
-
has_dim = False
|
120 |
|
121 |
ret_val = getitem_fn(self, index)
|
122 |
|
123 |
-
if has_dim:
|
124 |
-
del self._input_dim
|
125 |
-
|
126 |
return ret_val
|
127 |
|
128 |
return wrapper
|
|
|
87 |
return self.__input_dim
|
88 |
|
89 |
@staticmethod
|
90 |
+
def mosaic_getitem(getitem_fn):
|
91 |
"""
|
92 |
Decorator method that needs to be used around the ``__getitem__`` method. |br|
|
93 |
+
This decorator enables the closing mosaic
|
|
|
94 |
|
95 |
Example:
|
96 |
>>> class CustomSet(ln.data.Dataset):
|
97 |
... def __len__(self):
|
98 |
... return 10
|
99 |
+
... @ln.data.Dataset.mosaic_getitem
|
100 |
... def __getitem__(self, index):
|
101 |
+
... return self.enable_mosaic
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
"""
|
103 |
|
104 |
@wraps(getitem_fn)
|
105 |
def wrapper(self, index):
|
106 |
if not isinstance(index, int):
|
107 |
+
self.enable_mosaic = index[0]
|
|
|
|
|
108 |
index = index[1]
|
|
|
|
|
109 |
|
110 |
ret_val = getitem_fn(self, index)
|
111 |
|
|
|
|
|
|
|
112 |
return ret_val
|
113 |
|
114 |
return wrapper
|
yolox/data/datasets/mosaicdetection.py
CHANGED
@@ -7,7 +7,7 @@ import random
|
|
7 |
import cv2
|
8 |
import numpy as np
|
9 |
|
10 |
-
from yolox.utils import adjust_box_anns
|
11 |
|
12 |
from ..data_augment import box_candidates, random_perspective
|
13 |
from .datasets_wrapper import Dataset
|
@@ -40,7 +40,8 @@ class MosaicDetection(Dataset):
|
|
40 |
def __init__(
|
41 |
self, dataset, img_size, mosaic=True, preproc=None,
|
42 |
degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5),
|
43 |
-
shear=2.0, perspective=0.0, enable_mixup=True,
|
|
|
44 |
):
|
45 |
"""
|
46 |
|
@@ -69,13 +70,16 @@ class MosaicDetection(Dataset):
|
|
69 |
self.mixup_scale = mscale
|
70 |
self.enable_mosaic = mosaic
|
71 |
self.enable_mixup = enable_mixup
|
|
|
|
|
|
|
72 |
|
73 |
def __len__(self):
|
74 |
return len(self._dataset)
|
75 |
|
76 |
-
@Dataset.
|
77 |
def __getitem__(self, idx):
|
78 |
-
if self.enable_mosaic:
|
79 |
mosaic_labels = []
|
80 |
input_dim = self._dataset.input_dim
|
81 |
input_h, input_w = input_dim[0], input_dim[1]
|
@@ -137,7 +141,11 @@ class MosaicDetection(Dataset):
|
|
137 |
# -----------------------------------------------------------------
|
138 |
# CopyPaste: https://arxiv.org/abs/2012.07177
|
139 |
# -----------------------------------------------------------------
|
140 |
-
if
|
|
|
|
|
|
|
|
|
141 |
mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
|
142 |
mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
|
143 |
img_info = (mix_img.shape[1], mix_img.shape[0])
|
@@ -160,31 +168,35 @@ class MosaicDetection(Dataset):
|
|
160 |
img, cp_labels, _, _ = self._dataset.pull_item(cp_index)
|
161 |
|
162 |
if len(img.shape) == 3:
|
163 |
-
cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114
|
164 |
else:
|
165 |
-
cp_img = np.ones(input_dim) * 114
|
|
|
166 |
cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
|
167 |
resized_img = cv2.resize(
|
168 |
img,
|
169 |
(int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
|
170 |
interpolation=cv2.INTER_LINEAR,
|
171 |
-
)
|
|
|
172 |
cp_img[
|
173 |
: int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)
|
174 |
] = resized_img
|
|
|
175 |
cp_img = cv2.resize(
|
176 |
cp_img,
|
177 |
(int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)),
|
178 |
)
|
179 |
cp_scale_ratio *= jit_factor
|
|
|
180 |
if FLIP:
|
181 |
cp_img = cp_img[:, ::-1, :]
|
182 |
|
183 |
origin_h, origin_w = cp_img.shape[:2]
|
184 |
target_h, target_w = origin_img.shape[:2]
|
185 |
padded_img = np.zeros(
|
186 |
-
(max(origin_h, target_h), max(origin_w, target_w), 3)
|
187 |
-
)
|
188 |
padded_img[:origin_h, :origin_w] = cp_img
|
189 |
|
190 |
x_offset, y_offset = 0, 0
|
@@ -220,4 +232,4 @@ class MosaicDetection(Dataset):
|
|
220 |
origin_img = origin_img.astype(np.float32)
|
221 |
origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
|
222 |
|
223 |
-
return origin_img, origin_labels
|
|
|
7 |
import cv2
|
8 |
import numpy as np
|
9 |
|
10 |
+
from yolox.utils import adjust_box_anns, get_local_rank
|
11 |
|
12 |
from ..data_augment import box_candidates, random_perspective
|
13 |
from .datasets_wrapper import Dataset
|
|
|
40 |
def __init__(
|
41 |
self, dataset, img_size, mosaic=True, preproc=None,
|
42 |
degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5),
|
43 |
+
shear=2.0, perspective=0.0, enable_mixup=True,
|
44 |
+
mosaic_prob=1.0, mixup_prob=1.0, *args
|
45 |
):
|
46 |
"""
|
47 |
|
|
|
70 |
self.mixup_scale = mscale
|
71 |
self.enable_mosaic = mosaic
|
72 |
self.enable_mixup = enable_mixup
|
73 |
+
self.mosaic_prob = mosaic_prob
|
74 |
+
self.mixup_prob = mixup_prob
|
75 |
+
self.local_rank = get_local_rank()
|
76 |
|
77 |
def __len__(self):
|
78 |
return len(self._dataset)
|
79 |
|
80 |
+
@Dataset.mosaic_getitem
|
81 |
def __getitem__(self, idx):
|
82 |
+
if self.enable_mosaic and random.random() < self.mosaic_prob:
|
83 |
mosaic_labels = []
|
84 |
input_dim = self._dataset.input_dim
|
85 |
input_h, input_w = input_dim[0], input_dim[1]
|
|
|
141 |
# -----------------------------------------------------------------
|
142 |
# CopyPaste: https://arxiv.org/abs/2012.07177
|
143 |
# -----------------------------------------------------------------
|
144 |
+
if (
|
145 |
+
self.enable_mixup
|
146 |
+
and not len(mosaic_labels) == 0
|
147 |
+
and random.random() < self.mixup_prob
|
148 |
+
):
|
149 |
mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)
|
150 |
mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)
|
151 |
img_info = (mix_img.shape[1], mix_img.shape[0])
|
|
|
168 |
img, cp_labels, _, _ = self._dataset.pull_item(cp_index)
|
169 |
|
170 |
if len(img.shape) == 3:
|
171 |
+
cp_img = np.ones((input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
|
172 |
else:
|
173 |
+
cp_img = np.ones(input_dim, dtype=np.uint8) * 114
|
174 |
+
|
175 |
cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])
|
176 |
resized_img = cv2.resize(
|
177 |
img,
|
178 |
(int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),
|
179 |
interpolation=cv2.INTER_LINEAR,
|
180 |
+
)
|
181 |
+
|
182 |
cp_img[
|
183 |
: int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)
|
184 |
] = resized_img
|
185 |
+
|
186 |
cp_img = cv2.resize(
|
187 |
cp_img,
|
188 |
(int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)),
|
189 |
)
|
190 |
cp_scale_ratio *= jit_factor
|
191 |
+
|
192 |
if FLIP:
|
193 |
cp_img = cp_img[:, ::-1, :]
|
194 |
|
195 |
origin_h, origin_w = cp_img.shape[:2]
|
196 |
target_h, target_w = origin_img.shape[:2]
|
197 |
padded_img = np.zeros(
|
198 |
+
(max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
|
199 |
+
)
|
200 |
padded_img[:origin_h, :origin_w] = cp_img
|
201 |
|
202 |
x_offset, y_offset = 0, 0
|
|
|
232 |
origin_img = origin_img.astype(np.float32)
|
233 |
origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
|
234 |
|
235 |
+
return origin_img.astype(np.uint8), origin_labels
|
yolox/data/datasets/voc.py
CHANGED
@@ -10,6 +10,7 @@ import os
|
|
10 |
import os.path
|
11 |
import pickle
|
12 |
import xml.etree.ElementTree as ET
|
|
|
13 |
|
14 |
import cv2
|
15 |
import numpy as np
|
@@ -35,7 +36,9 @@ class AnnotationTransform(object):
|
|
35 |
"""
|
36 |
|
37 |
def __init__(self, class_to_ind=None, keep_difficult=True):
|
38 |
-
self.class_to_ind = class_to_ind or dict(
|
|
|
|
|
39 |
self.keep_difficult = keep_difficult
|
40 |
|
41 |
def __call__(self, target):
|
@@ -48,7 +51,11 @@ class AnnotationTransform(object):
|
|
48 |
"""
|
49 |
res = np.empty((0, 5))
|
50 |
for obj in target.iter("object"):
|
51 |
-
difficult =
|
|
|
|
|
|
|
|
|
52 |
if not self.keep_difficult and difficult:
|
53 |
continue
|
54 |
name = obj.find("name").text.strip()
|
@@ -66,7 +73,11 @@ class AnnotationTransform(object):
|
|
66 |
res = np.vstack((res, bndbox)) # [xmin, ymin, xmax, ymax, label_ind]
|
67 |
# img_id = target.find('filename').text[:-4]
|
68 |
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
|
71 |
|
72 |
class VOCDetection(Dataset):
|
@@ -91,11 +102,12 @@ class VOCDetection(Dataset):
|
|
91 |
def __init__(
|
92 |
self,
|
93 |
data_dir,
|
94 |
-
image_sets=[(
|
95 |
img_size=(416, 416),
|
96 |
preproc=None,
|
97 |
target_transform=AnnotationTransform(),
|
98 |
dataset_name="VOC0712",
|
|
|
99 |
):
|
100 |
super().__init__(img_size)
|
101 |
self.root = data_dir
|
@@ -116,16 +128,98 @@ class VOCDetection(Dataset):
|
|
116 |
):
|
117 |
self.ids.append((rootpath, line.strip()))
|
118 |
|
|
|
|
|
|
|
|
|
|
|
119 |
def __len__(self):
|
120 |
return len(self.ids)
|
121 |
|
122 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
img_id = self.ids[index]
|
124 |
target = ET.parse(self._annopath % img_id).getroot()
|
125 |
-
if self.target_transform is not None:
|
126 |
-
target = self.target_transform(target)
|
127 |
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
def pull_item(self, index):
|
131 |
"""Returns the original image and target at an index for mixup
|
@@ -138,17 +232,17 @@ class VOCDetection(Dataset):
|
|
138 |
Return:
|
139 |
img, target
|
140 |
"""
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
|
149 |
return img, target, img_info, index
|
150 |
|
151 |
-
@Dataset.
|
152 |
def __getitem__(self, index):
|
153 |
img, target, img_info, img_id = self.pull_item(index)
|
154 |
|
@@ -167,7 +261,9 @@ class VOCDetection(Dataset):
|
|
167 |
all_boxes[class][image] = [] or np.array of shape #dets x 5
|
168 |
"""
|
169 |
self._write_voc_results_file(all_boxes)
|
170 |
-
IouTh = np.linspace(
|
|
|
|
|
171 |
mAPs = []
|
172 |
for iou in IouTh:
|
173 |
mAP = self._do_python_eval(output_dir, iou)
|
|
|
10 |
import os.path
|
11 |
import pickle
|
12 |
import xml.etree.ElementTree as ET
|
13 |
+
from loguru import logger
|
14 |
|
15 |
import cv2
|
16 |
import numpy as np
|
|
|
36 |
"""
|
37 |
|
38 |
def __init__(self, class_to_ind=None, keep_difficult=True):
|
39 |
+
self.class_to_ind = class_to_ind or dict(
|
40 |
+
zip(VOC_CLASSES, range(len(VOC_CLASSES)))
|
41 |
+
)
|
42 |
self.keep_difficult = keep_difficult
|
43 |
|
44 |
def __call__(self, target):
|
|
|
51 |
"""
|
52 |
res = np.empty((0, 5))
|
53 |
for obj in target.iter("object"):
|
54 |
+
difficult = obj.find("difficult")
|
55 |
+
if difficult is not None:
|
56 |
+
difficult = int(difficult.text) == 1
|
57 |
+
else:
|
58 |
+
difficult = False
|
59 |
if not self.keep_difficult and difficult:
|
60 |
continue
|
61 |
name = obj.find("name").text.strip()
|
|
|
73 |
res = np.vstack((res, bndbox)) # [xmin, ymin, xmax, ymax, label_ind]
|
74 |
# img_id = target.find('filename').text[:-4]
|
75 |
|
76 |
+
width = int(target.find("size").find("width").text)
|
77 |
+
height = int(target.find("size").find("height").text)
|
78 |
+
img_info = (height, width)
|
79 |
+
|
80 |
+
return res, img_info
|
81 |
|
82 |
|
83 |
class VOCDetection(Dataset):
|
|
|
102 |
def __init__(
|
103 |
self,
|
104 |
data_dir,
|
105 |
+
image_sets=[("2007", "trainval"), ("2012", "trainval")],
|
106 |
img_size=(416, 416),
|
107 |
preproc=None,
|
108 |
target_transform=AnnotationTransform(),
|
109 |
dataset_name="VOC0712",
|
110 |
+
cache=False,
|
111 |
):
|
112 |
super().__init__(img_size)
|
113 |
self.root = data_dir
|
|
|
128 |
):
|
129 |
self.ids.append((rootpath, line.strip()))
|
130 |
|
131 |
+
self.annotations = self._load_coco_annotations()
|
132 |
+
self.imgs = None
|
133 |
+
if cache:
|
134 |
+
self._cache_images()
|
135 |
+
|
136 |
def __len__(self):
|
137 |
return len(self.ids)
|
138 |
|
139 |
+
def _load_coco_annotations(self):
|
140 |
+
return [self.load_anno_from_ids(_ids) for _ids in range(len(self.ids))]
|
141 |
+
|
142 |
+
def _cache_images(self):
|
143 |
+
logger.warning(
|
144 |
+
"\n********************************************************************************\n"
|
145 |
+
"You are using cached images in RAM to accelerate training.\n"
|
146 |
+
"This requires large system RAM.\n"
|
147 |
+
"Make sure you have 60G+ RAM and 19G available disk space for training VOC.\n"
|
148 |
+
"********************************************************************************\n"
|
149 |
+
)
|
150 |
+
max_h = self.img_size[0]
|
151 |
+
max_w = self.img_size[1]
|
152 |
+
cache_file = self.root + "/img_resized_cache_" + self.name + ".array"
|
153 |
+
if not os.path.exists(cache_file):
|
154 |
+
logger.info(
|
155 |
+
"Caching images for the frist time. This might take about 3 minutes for VOC"
|
156 |
+
)
|
157 |
+
self.imgs = np.memmap(
|
158 |
+
cache_file,
|
159 |
+
shape=(len(self.ids), max_h, max_w, 3),
|
160 |
+
dtype=np.uint8,
|
161 |
+
mode="w+",
|
162 |
+
)
|
163 |
+
from tqdm import tqdm
|
164 |
+
from multiprocessing.pool import ThreadPool
|
165 |
+
|
166 |
+
NUM_THREADs = min(8, os.cpu_count())
|
167 |
+
loaded_images = ThreadPool(NUM_THREADs).imap(
|
168 |
+
lambda x: self.load_resized_img(x),
|
169 |
+
range(len(self.annotations)),
|
170 |
+
)
|
171 |
+
pbar = tqdm(enumerate(loaded_images), total=len(self.annotations))
|
172 |
+
for k, out in pbar:
|
173 |
+
self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy()
|
174 |
+
self.imgs.flush()
|
175 |
+
pbar.close()
|
176 |
+
else:
|
177 |
+
logger.warning(
|
178 |
+
"You are using cached imgs! Make sure your dataset is not changed!!"
|
179 |
+
)
|
180 |
+
|
181 |
+
logger.info("Loading cached imgs...")
|
182 |
+
self.imgs = np.memmap(
|
183 |
+
cache_file,
|
184 |
+
shape=(len(self.ids), max_h, max_w, 3),
|
185 |
+
dtype=np.uint8,
|
186 |
+
mode="r+",
|
187 |
+
)
|
188 |
+
|
189 |
+
def load_anno_from_ids(self, index):
|
190 |
img_id = self.ids[index]
|
191 |
target = ET.parse(self._annopath % img_id).getroot()
|
|
|
|
|
192 |
|
193 |
+
assert self.target_transform is not None
|
194 |
+
res, img_info = self.target_transform(target)
|
195 |
+
height, width = img_info
|
196 |
+
|
197 |
+
r = min(self.img_size[0] / height, self.img_size[1] / width)
|
198 |
+
res[:, :4] *= r
|
199 |
+
resized_info = (int(height * r), int(width * r))
|
200 |
+
|
201 |
+
return (res, img_info, resized_info)
|
202 |
+
|
203 |
+
def load_anno(self, index):
|
204 |
+
return self.annotations[index][0]
|
205 |
+
|
206 |
+
def load_resized_img(self, index):
|
207 |
+
img = self.load_image(index)
|
208 |
+
r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])
|
209 |
+
resized_img = cv2.resize(
|
210 |
+
img,
|
211 |
+
(int(img.shape[1] * r), int(img.shape[0] * r)),
|
212 |
+
interpolation=cv2.INTER_LINEAR,
|
213 |
+
).astype(np.uint8)
|
214 |
+
|
215 |
+
return resized_img
|
216 |
+
|
217 |
+
def load_image(self, index):
|
218 |
+
img_id = self.ids[index]
|
219 |
+
img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
|
220 |
+
assert img is not None
|
221 |
+
|
222 |
+
return img
|
223 |
|
224 |
def pull_item(self, index):
|
225 |
"""Returns the original image and target at an index for mixup
|
|
|
232 |
Return:
|
233 |
img, target
|
234 |
"""
|
235 |
+
if self.imgs is not None:
|
236 |
+
target, img_info, resized_info = self.annotations[index]
|
237 |
+
pad_img = self.imgs[index]
|
238 |
+
img = pad_img[: resized_info[0], : resized_info[1], :].copy()
|
239 |
+
else:
|
240 |
+
img = self.load_resized_img(index)
|
241 |
+
target, img_info, _ = self.annotations[index]
|
242 |
|
243 |
return img, target, img_info, index
|
244 |
|
245 |
+
@Dataset.mosaic_getitem
|
246 |
def __getitem__(self, index):
|
247 |
img, target, img_info, img_id = self.pull_item(index)
|
248 |
|
|
|
261 |
all_boxes[class][image] = [] or np.array of shape #dets x 5
|
262 |
"""
|
263 |
self._write_voc_results_file(all_boxes)
|
264 |
+
IouTh = np.linspace(
|
265 |
+
0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True
|
266 |
+
)
|
267 |
mAPs = []
|
268 |
for iou in IouTh:
|
269 |
mAP = self._do_python_eval(output_dir, iou)
|
yolox/data/samplers.py
CHANGED
@@ -13,28 +13,18 @@ from torch.utils.data.sampler import Sampler
|
|
13 |
|
14 |
class YoloBatchSampler(torchBatchSampler):
|
15 |
"""
|
16 |
-
This batch sampler will generate mini-batches of (
|
17 |
It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
|
18 |
-
but it will
|
19 |
"""
|
20 |
|
21 |
-
def __init__(self, *args,
|
22 |
super().__init__(*args, **kwargs)
|
23 |
-
self.input_dim = input_dimension
|
24 |
-
self.new_input_dim = None
|
25 |
self.mosaic = mosaic
|
26 |
|
27 |
def __iter__(self):
|
28 |
-
self.__set_input_dim()
|
29 |
for batch in super().__iter__():
|
30 |
-
yield [(self.
|
31 |
-
self.__set_input_dim()
|
32 |
-
|
33 |
-
def __set_input_dim(self):
|
34 |
-
""" This function randomly changes the the input dimension of the dataset. """
|
35 |
-
if self.new_input_dim is not None:
|
36 |
-
self.input_dim = (self.new_input_dim[0], self.new_input_dim[1])
|
37 |
-
self.new_input_dim = None
|
38 |
|
39 |
|
40 |
class InfiniteSampler(Sampler):
|
|
|
13 |
|
14 |
class YoloBatchSampler(torchBatchSampler):
|
15 |
"""
|
16 |
+
This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.
|
17 |
It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
|
18 |
+
but it will turn on/off the mosaic aug.
|
19 |
"""
|
20 |
|
21 |
+
def __init__(self, *args, mosaic=True, **kwargs):
|
22 |
super().__init__(*args, **kwargs)
|
|
|
|
|
23 |
self.mosaic = mosaic
|
24 |
|
25 |
def __iter__(self):
|
|
|
26 |
for batch in super().__iter__():
|
27 |
+
yield [(self.mosaic, idx) for idx in batch]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
class InfiniteSampler(Sampler):
|
yolox/exp/yolox_base.py
CHANGED
@@ -13,7 +13,6 @@ from .base_exp import BaseExp
|
|
13 |
|
14 |
|
15 |
class Exp(BaseExp):
|
16 |
-
|
17 |
def __init__(self):
|
18 |
super().__init__()
|
19 |
|
@@ -32,6 +31,8 @@ class Exp(BaseExp):
|
|
32 |
self.val_ann = "instances_val2017.json"
|
33 |
|
34 |
# --------------- transform config ----------------- #
|
|
|
|
|
35 |
self.degrees = 10.0
|
36 |
self.translate = 0.1
|
37 |
self.scale = (0.1, 2)
|
@@ -80,7 +81,9 @@ class Exp(BaseExp):
|
|
80 |
self.model.head.initialize_biases(1e-2)
|
81 |
return self.model
|
82 |
|
83 |
-
def get_data_loader(
|
|
|
|
|
84 |
from yolox.data import (
|
85 |
COCODataset,
|
86 |
TrainTransform,
|
@@ -88,34 +91,37 @@ class Exp(BaseExp):
|
|
88 |
DataLoader,
|
89 |
InfiniteSampler,
|
90 |
MosaicDetection,
|
|
|
91 |
)
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
json_file=self.train_ann,
|
96 |
-
img_size=self.input_size,
|
97 |
-
preproc=TrainTransform(
|
98 |
-
rgb_means=(0.485, 0.456, 0.406),
|
99 |
-
std=(0.229, 0.224, 0.225),
|
100 |
-
max_labels=50,
|
101 |
-
),
|
102 |
)
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
dataset = MosaicDetection(
|
105 |
dataset,
|
106 |
mosaic=not no_aug,
|
107 |
img_size=self.input_size,
|
108 |
-
preproc=TrainTransform(
|
109 |
-
rgb_means=(0.485, 0.456, 0.406),
|
110 |
-
std=(0.229, 0.224, 0.225),
|
111 |
-
max_labels=120,
|
112 |
-
),
|
113 |
degrees=self.degrees,
|
114 |
translate=self.translate,
|
115 |
scale=self.scale,
|
116 |
shear=self.shear,
|
117 |
perspective=self.perspective,
|
118 |
enable_mixup=self.enable_mixup,
|
|
|
|
|
119 |
)
|
120 |
|
121 |
self.dataset = dataset
|
@@ -123,20 +129,22 @@ class Exp(BaseExp):
|
|
123 |
if is_distributed:
|
124 |
batch_size = batch_size // dist.get_world_size()
|
125 |
|
126 |
-
sampler = InfiniteSampler(
|
127 |
-
len(self.dataset), seed=self.seed if self.seed else 0
|
128 |
-
)
|
129 |
|
130 |
batch_sampler = YoloBatchSampler(
|
131 |
sampler=sampler,
|
132 |
batch_size=batch_size,
|
133 |
drop_last=False,
|
134 |
-
input_dimension=self.input_size,
|
135 |
mosaic=not no_aug,
|
136 |
)
|
137 |
|
138 |
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
139 |
dataloader_kwargs["batch_sampler"] = batch_sampler
|
|
|
|
|
|
|
|
|
|
|
140 |
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
141 |
|
142 |
return train_loader
|
@@ -145,7 +153,7 @@ class Exp(BaseExp):
|
|
145 |
tensor = torch.LongTensor(2).cuda()
|
146 |
|
147 |
if rank == 0:
|
148 |
-
size_factor = self.input_size[1] * 1. / self.input_size[0]
|
149 |
size = random.randint(*self.random_size)
|
150 |
size = (int(32 * size), 32 * int(size * size_factor))
|
151 |
tensor[0] = size[0]
|
@@ -155,11 +163,18 @@ class Exp(BaseExp):
|
|
155 |
dist.barrier()
|
156 |
dist.broadcast(tensor, 0)
|
157 |
|
158 |
-
input_size =
|
159 |
-
multiple=(tensor[0].item(), tensor[1].item()), random_range=None
|
160 |
-
)
|
161 |
return input_size
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
def get_optimizer(self, batch_size):
|
164 |
if "optimizer" not in self.__dict__:
|
165 |
if self.warmup_epochs > 0:
|
@@ -190,6 +205,7 @@ class Exp(BaseExp):
|
|
190 |
|
191 |
def get_lr_scheduler(self, lr, iters_per_epoch):
|
192 |
from yolox.utils import LRScheduler
|
|
|
193 |
scheduler = LRScheduler(
|
194 |
self.scheduler,
|
195 |
lr,
|
@@ -202,7 +218,7 @@ class Exp(BaseExp):
|
|
202 |
)
|
203 |
return scheduler
|
204 |
|
205 |
-
def get_eval_loader(self, batch_size, is_distributed, testdev=False):
|
206 |
from yolox.data import COCODataset, ValTransform
|
207 |
|
208 |
valdataset = COCODataset(
|
@@ -210,9 +226,7 @@ class Exp(BaseExp):
|
|
210 |
json_file=self.val_ann if not testdev else "image_info_test-dev2017.json",
|
211 |
name="val2017" if not testdev else "test2017",
|
212 |
img_size=self.test_size,
|
213 |
-
preproc=ValTransform(
|
214 |
-
rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
|
215 |
-
),
|
216 |
)
|
217 |
|
218 |
if is_distributed:
|
@@ -233,10 +247,10 @@ class Exp(BaseExp):
|
|
233 |
|
234 |
return val_loader
|
235 |
|
236 |
-
def get_evaluator(self, batch_size, is_distributed, testdev=False):
|
237 |
from yolox.evaluators import COCOEvaluator
|
238 |
|
239 |
-
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev
|
240 |
evaluator = COCOEvaluator(
|
241 |
dataloader=val_loader,
|
242 |
img_size=self.test_size,
|
|
|
13 |
|
14 |
|
15 |
class Exp(BaseExp):
|
|
|
16 |
def __init__(self):
|
17 |
super().__init__()
|
18 |
|
|
|
31 |
self.val_ann = "instances_val2017.json"
|
32 |
|
33 |
# --------------- transform config ----------------- #
|
34 |
+
self.mosaic_prob = 1.0
|
35 |
+
self.mixup_prob = 1.0
|
36 |
self.degrees = 10.0
|
37 |
self.translate = 0.1
|
38 |
self.scale = (0.1, 2)
|
|
|
81 |
self.model.head.initialize_biases(1e-2)
|
82 |
return self.model
|
83 |
|
84 |
+
def get_data_loader(
|
85 |
+
self, batch_size, is_distributed, no_aug=False, cache_img=False
|
86 |
+
):
|
87 |
from yolox.data import (
|
88 |
COCODataset,
|
89 |
TrainTransform,
|
|
|
91 |
DataLoader,
|
92 |
InfiniteSampler,
|
93 |
MosaicDetection,
|
94 |
+
worker_init_reset_seed,
|
95 |
)
|
96 |
+
from yolox.utils import (
|
97 |
+
wait_for_the_master,
|
98 |
+
get_local_rank,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
)
|
100 |
|
101 |
+
local_rank = get_local_rank()
|
102 |
+
|
103 |
+
with wait_for_the_master(local_rank):
|
104 |
+
dataset = COCODataset(
|
105 |
+
data_dir=self.data_dir,
|
106 |
+
json_file=self.train_ann,
|
107 |
+
img_size=self.input_size,
|
108 |
+
preproc=TrainTransform(max_labels=50),
|
109 |
+
cache=cache_img,
|
110 |
+
)
|
111 |
+
|
112 |
dataset = MosaicDetection(
|
113 |
dataset,
|
114 |
mosaic=not no_aug,
|
115 |
img_size=self.input_size,
|
116 |
+
preproc=TrainTransform(max_labels=120),
|
|
|
|
|
|
|
|
|
117 |
degrees=self.degrees,
|
118 |
translate=self.translate,
|
119 |
scale=self.scale,
|
120 |
shear=self.shear,
|
121 |
perspective=self.perspective,
|
122 |
enable_mixup=self.enable_mixup,
|
123 |
+
mosaic_prob=self.mosaic_prob,
|
124 |
+
mixup_prob=self.mixup_prob,
|
125 |
)
|
126 |
|
127 |
self.dataset = dataset
|
|
|
129 |
if is_distributed:
|
130 |
batch_size = batch_size // dist.get_world_size()
|
131 |
|
132 |
+
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
|
|
|
|
|
133 |
|
134 |
batch_sampler = YoloBatchSampler(
|
135 |
sampler=sampler,
|
136 |
batch_size=batch_size,
|
137 |
drop_last=False,
|
|
|
138 |
mosaic=not no_aug,
|
139 |
)
|
140 |
|
141 |
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
142 |
dataloader_kwargs["batch_sampler"] = batch_sampler
|
143 |
+
|
144 |
+
# Make sure each process has different random seed, especially for 'fork' method.
|
145 |
+
# Check https://github.com/pytorch/pytorch/issues/63311 for more details.
|
146 |
+
dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
|
147 |
+
|
148 |
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
149 |
|
150 |
return train_loader
|
|
|
153 |
tensor = torch.LongTensor(2).cuda()
|
154 |
|
155 |
if rank == 0:
|
156 |
+
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
|
157 |
size = random.randint(*self.random_size)
|
158 |
size = (int(32 * size), 32 * int(size * size_factor))
|
159 |
tensor[0] = size[0]
|
|
|
163 |
dist.barrier()
|
164 |
dist.broadcast(tensor, 0)
|
165 |
|
166 |
+
input_size = (tensor[0].item(), tensor[1].item())
|
|
|
|
|
167 |
return input_size
|
168 |
|
169 |
+
def preprocess(self, inputs, targets, tsize):
|
170 |
+
scale = tsize[0] / self.input_size[0]
|
171 |
+
if scale != 1:
|
172 |
+
inputs = nn.functional.interpolate(
|
173 |
+
inputs, size=tsize, mode="bilinear", align_corners=False
|
174 |
+
)
|
175 |
+
targets[..., 1:] = targets[..., 1:] * scale
|
176 |
+
return inputs, targets
|
177 |
+
|
178 |
def get_optimizer(self, batch_size):
|
179 |
if "optimizer" not in self.__dict__:
|
180 |
if self.warmup_epochs > 0:
|
|
|
205 |
|
206 |
def get_lr_scheduler(self, lr, iters_per_epoch):
|
207 |
from yolox.utils import LRScheduler
|
208 |
+
|
209 |
scheduler = LRScheduler(
|
210 |
self.scheduler,
|
211 |
lr,
|
|
|
218 |
)
|
219 |
return scheduler
|
220 |
|
221 |
+
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
|
222 |
from yolox.data import COCODataset, ValTransform
|
223 |
|
224 |
valdataset = COCODataset(
|
|
|
226 |
json_file=self.val_ann if not testdev else "image_info_test-dev2017.json",
|
227 |
name="val2017" if not testdev else "test2017",
|
228 |
img_size=self.test_size,
|
229 |
+
preproc=ValTransform(legacy=legacy),
|
|
|
|
|
230 |
)
|
231 |
|
232 |
if is_distributed:
|
|
|
247 |
|
248 |
return val_loader
|
249 |
|
250 |
+
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
|
251 |
from yolox.evaluators import COCOEvaluator
|
252 |
|
253 |
+
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
|
254 |
evaluator = COCOEvaluator(
|
255 |
dataloader=val_loader,
|
256 |
img_size=self.test_size,
|
yolox/models/yolo_head.py
CHANGED
@@ -486,13 +486,14 @@ class YOLOXHead(nn.Module):
|
|
486 |
if mode == "cpu":
|
487 |
cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu()
|
488 |
|
489 |
-
|
490 |
-
cls_preds_
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
|
|
496 |
del cls_preds_
|
497 |
|
498 |
cost = (
|
|
|
486 |
if mode == "cpu":
|
487 |
cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu()
|
488 |
|
489 |
+
with torch.cuda.amp.autocast(enabled=False):
|
490 |
+
cls_preds_ = (
|
491 |
+
cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
|
492 |
+
* obj_preds_.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
|
493 |
+
)
|
494 |
+
pair_wise_cls_loss = F.binary_cross_entropy(
|
495 |
+
cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
|
496 |
+
).sum(-1)
|
497 |
del cls_preds_
|
498 |
|
499 |
cost = (
|
yolox/utils/dist.py
CHANGED
@@ -10,9 +10,11 @@ This is useful when doing distributed training.
|
|
10 |
"""
|
11 |
|
12 |
import functools
|
13 |
-
import
|
14 |
import pickle
|
15 |
import time
|
|
|
|
|
16 |
|
17 |
import numpy as np
|
18 |
|
@@ -20,6 +22,8 @@ import torch
|
|
20 |
from torch import distributed as dist
|
21 |
|
22 |
__all__ = [
|
|
|
|
|
23 |
"is_main_process",
|
24 |
"synchronize",
|
25 |
"get_world_size",
|
@@ -34,6 +38,33 @@ __all__ = [
|
|
34 |
_LOCAL_PROCESS_GROUP = None
|
35 |
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def synchronize():
|
38 |
"""
|
39 |
Helper function to synchronize (barrier) among all processes when using distributed training
|
@@ -112,7 +143,6 @@ def _serialize_to_tensor(data, group):
|
|
112 |
|
113 |
buffer = pickle.dumps(data)
|
114 |
if len(buffer) > 1024 ** 3:
|
115 |
-
logger = logging.getLogger(__name__)
|
116 |
logger.warning(
|
117 |
"Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
|
118 |
get_rank(), len(buffer) / (1024 ** 3), device
|
|
|
10 |
"""
|
11 |
|
12 |
import functools
|
13 |
+
import os
|
14 |
import pickle
|
15 |
import time
|
16 |
+
from contextlib import contextmanager
|
17 |
+
from loguru import logger
|
18 |
|
19 |
import numpy as np
|
20 |
|
|
|
22 |
from torch import distributed as dist
|
23 |
|
24 |
__all__ = [
|
25 |
+
"get_num_devices",
|
26 |
+
"wait_for_the_master",
|
27 |
"is_main_process",
|
28 |
"synchronize",
|
29 |
"get_world_size",
|
|
|
38 |
_LOCAL_PROCESS_GROUP = None
|
39 |
|
40 |
|
41 |
+
def get_num_devices():
|
42 |
+
gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)
|
43 |
+
if gpu_list is not None:
|
44 |
+
return len(gpu_list.split(','))
|
45 |
+
else:
|
46 |
+
devices_list_info = os.popen("nvidia-smi -L")
|
47 |
+
devices_list_info = devices_list_info.read().strip().split("\n")
|
48 |
+
return len(devices_list_info)
|
49 |
+
|
50 |
+
|
51 |
+
@contextmanager
|
52 |
+
def wait_for_the_master(local_rank: int):
|
53 |
+
"""
|
54 |
+
Make all processes waiting for the master to do some task.
|
55 |
+
"""
|
56 |
+
if local_rank > 0:
|
57 |
+
dist.barrier()
|
58 |
+
yield
|
59 |
+
if local_rank == 0:
|
60 |
+
if not dist.is_available():
|
61 |
+
return
|
62 |
+
if not dist.is_initialized():
|
63 |
+
return
|
64 |
+
else:
|
65 |
+
dist.barrier()
|
66 |
+
|
67 |
+
|
68 |
def synchronize():
|
69 |
"""
|
70 |
Helper function to synchronize (barrier) among all processes when using distributed training
|
|
|
143 |
|
144 |
buffer = pickle.dumps(data)
|
145 |
if len(buffer) > 1024 ** 3:
|
|
|
146 |
logger.warning(
|
147 |
"Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
|
148 |
get_rank(), len(buffer) / (1024 ** 3), device
|