Kaelan commited on
Commit
01e1043
·
1 Parent(s): f2ae3aa
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env +1 -0
  2. deep_sort_torch/.gitignore +13 -0
  3. deep_sort_torch/.gitmodules +6 -0
  4. deep_sort_torch/LICENSE +21 -0
  5. deep_sort_torch/README.md +170 -0
  6. deep_sort_torch/configs/deep_sort.yaml +10 -0
  7. deep_sort_torch/configs/fastreid.yaml +3 -0
  8. deep_sort_torch/configs/mmdet.yaml +5 -0
  9. deep_sort_torch/configs/yolov3.yaml +7 -0
  10. deep_sort_torch/configs/yolov3_tiny.yaml +7 -0
  11. deep_sort_torch/deep_sort/README.md +3 -0
  12. deep_sort_torch/deep_sort/__init__.py +28 -0
  13. deep_sort_torch/deep_sort/deep/__init__.py +0 -0
  14. deep_sort_torch/deep_sort/deep/checkpoint/.gitkeep +0 -0
  15. deep_sort_torch/deep_sort/deep/evaluate.py +15 -0
  16. deep_sort_torch/deep_sort/deep/feature_extractor.py +95 -0
  17. deep_sort_torch/deep_sort/deep/model.py +104 -0
  18. deep_sort_torch/deep_sort/deep/original_model.py +106 -0
  19. deep_sort_torch/deep_sort/deep/test.py +77 -0
  20. deep_sort_torch/deep_sort/deep/train.jpg +0 -0
  21. deep_sort_torch/deep_sort/deep/train.py +189 -0
  22. deep_sort_torch/deep_sort/deep_sort.py +117 -0
  23. deep_sort_torch/deep_sort/sort/__init__.py +0 -0
  24. deep_sort_torch/deep_sort/sort/detection.py +49 -0
  25. deep_sort_torch/deep_sort/sort/iou_matching.py +81 -0
  26. deep_sort_torch/deep_sort/sort/kalman_filter.py +229 -0
  27. deep_sort_torch/deep_sort/sort/linear_assignment.py +192 -0
  28. deep_sort_torch/deep_sort/sort/nn_matching.py +177 -0
  29. deep_sort_torch/deep_sort/sort/preprocessing.py +73 -0
  30. deep_sort_torch/deep_sort/sort/track.py +166 -0
  31. deep_sort_torch/deep_sort/sort/tracker.py +138 -0
  32. deep_sort_torch/deepsort.py +172 -0
  33. deep_sort_torch/detector/MMDet/__init__.py +2 -0
  34. deep_sort_torch/detector/MMDet/detector.py +55 -0
  35. deep_sort_torch/detector/MMDet/mmdet_utils.py +15 -0
  36. deep_sort_torch/detector/YOLOv3/README.md +11 -0
  37. deep_sort_torch/detector/YOLOv3/__init__.py +9 -0
  38. deep_sort_torch/detector/YOLOv3/cfg.py +248 -0
  39. deep_sort_torch/detector/YOLOv3/cfg/coco.data +5 -0
  40. deep_sort_torch/detector/YOLOv3/cfg/coco.names +80 -0
  41. deep_sort_torch/detector/YOLOv3/cfg/darknet19_448.cfg +200 -0
  42. deep_sort_torch/detector/YOLOv3/cfg/tiny-yolo-voc.cfg +134 -0
  43. deep_sort_torch/detector/YOLOv3/cfg/tiny-yolo.cfg +140 -0
  44. deep_sort_torch/detector/YOLOv3/cfg/voc.data +5 -0
  45. deep_sort_torch/detector/YOLOv3/cfg/voc.names +20 -0
  46. deep_sort_torch/detector/YOLOv3/cfg/voc_gaotie.data +5 -0
  47. deep_sort_torch/detector/YOLOv3/cfg/yolo-voc.cfg +258 -0
  48. deep_sort_torch/detector/YOLOv3/cfg/yolo.cfg +258 -0
  49. deep_sort_torch/detector/YOLOv3/cfg/yolo_v3.cfg +789 -0
  50. deep_sort_torch/detector/YOLOv3/cfg/yolov3-tiny.cfg +182 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ secret_key='super secret key'
deep_sort_torch/.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Folders
2
+ __pycache__/
3
+ build/
4
+ *.egg-info
5
+
6
+
7
+ # Files
8
+ *.weights
9
+ *.t7
10
+ *.mp4
11
+ *.avi
12
+ *.so
13
+ *.txt
deep_sort_torch/.gitmodules ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [submodule "thirdparty/fast-reid"]
2
+ path = thirdparty/fast-reid
3
+ url = https://github.com/JDAI-CV/fast-reid.git
4
+ [submodule "thirdparty/mmdetection"]
5
+ path = thirdparty/mmdetection
6
+ url = https://github.com/open-mmlab/mmdetection.git
deep_sort_torch/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Ziqiang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
deep_sort_torch/README.md ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deep Sort with PyTorch
2
+
3
+ ![](demo/demo.gif)
4
+
5
+ ## Update(1-1-2020)
6
+ Changes
7
+ - fix bugs
8
+ - refactor code
9
+ - accerate detection by adding nms on gpu
10
+
11
+ ## Latest Update(07-22)
12
+ Changes
13
+ - bug fix (Thanks @JieChen91 and @yingsen1 for bug reporting).
14
+ - using batch for feature extracting for each frame, which lead to a small speed up.
15
+ - code improvement.
16
+
17
+ Futher improvement direction
18
+ - Train detector on specific dataset rather than the official one.
19
+ - Retrain REID model on pedestrain dataset for better performance.
20
+ - Replace YOLOv3 detector with advanced ones.
21
+
22
+ **Any contributions to this repository is welcome!**
23
+
24
+
25
+ ## Introduction
26
+ This is an implement of MOT tracking algorithm deep sort. Deep sort is basicly the same with sort but added a CNN model to extract features in image of human part bounded by a detector. This CNN model is indeed a RE-ID model and the detector used in [PAPER](https://arxiv.org/abs/1703.07402) is FasterRCNN , and the original source code is [HERE](https://github.com/nwojke/deep_sort).
27
+ However in original code, the CNN model is implemented with tensorflow, which I'm not familier with. SO I re-implemented the CNN feature extraction model with PyTorch, and changed the CNN model a little bit. Also, I use **YOLOv3** to generate bboxes instead of FasterRCNN.
28
+
29
+ ## Dependencies
30
+ - python 3 (python2 not sure)
31
+ - numpy
32
+ - scipy
33
+ - opencv-python
34
+ - sklearn
35
+ - torch >= 0.4
36
+ - torchvision >= 0.1
37
+ - pillow
38
+ - vizer
39
+ - edict
40
+
41
+ ## Quick Start
42
+ 0. Check all dependencies installed
43
+ ```bash
44
+ pip install -r requirements.txt
45
+ ```
46
+ for user in china, you can specify pypi source to accelerate install like:
47
+ ```bash
48
+ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
49
+ ```
50
+
51
+ 1. Clone this repository
52
+ ```
53
+ git clone [email protected]:ZQPei/deep_sort_pytorch.git
54
+ ```
55
+
56
+ 2. Download YOLOv3 parameters
57
+ ```
58
+ cd detector/YOLOv3/weight/
59
+ wget https://pjreddie.com/media/files/yolov3.weights
60
+ wget https://pjreddie.com/media/files/yolov3-tiny.weights
61
+ cd ../../../
62
+ ```
63
+
64
+ 3. Download deepsort parameters ckpt.t7
65
+ ```
66
+ cd deep_sort/deep/checkpoint
67
+ # download ckpt.t7 from
68
+ https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder
69
+ cd ../../../
70
+ ```
71
+
72
+ 4. Compile nms module
73
+ ```bash
74
+ cd detector/YOLOv3/nms
75
+ sh build.sh
76
+ cd ../../..
77
+ ```
78
+
79
+ Notice:
80
+ If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`.
81
+
82
+ 5. (Optional) Prepare third party submodules
83
+
84
+ [fast-reid](https://github.com/JDAI-CV/fast-reid)
85
+
86
+ This library supports bagtricks, AGW and other mainstream ReID methods through providing an fast-reid adapter.
87
+
88
+ to prepare our bundled fast-reid, then follow instructions in its README to install it.
89
+
90
+ Please refer to `configs/fastreid.yaml` for a sample of using fast-reid. See [Model Zoo](https://github.com/JDAI-CV/fast-reid/blob/master/docs/MODEL_ZOO.md) for available methods and trained models.
91
+
92
+ [MMDetection](https://github.com/open-mmlab/mmdetection)
93
+
94
+ This library supports Faster R-CNN and other mainstream detection methods through providing an MMDetection adapter.
95
+
96
+ to prepare our bundled MMDetection, then follow instructions in its README to install it.
97
+
98
+ Please refer to `configs/mmdet.yaml` for a sample of using MMDetection. See [Model Zoo](https://github.com/open-mmlab/mmdetection/blob/master/docs/model_zoo.md) for available methods and trained models.
99
+
100
+ Run
101
+
102
+ ```
103
+ git submodule update --init --recursive
104
+ ```
105
+
106
+
107
+ 6. Run demo
108
+ ```
109
+ usage: deepsort.py [-h]
110
+ [--fastreid]
111
+ [--config_fastreid CONFIG_FASTREID]
112
+ [--mmdet]
113
+ [--config_mmdetection CONFIG_MMDETECTION]
114
+ [--config_detection CONFIG_DETECTION]
115
+ [--config_deepsort CONFIG_DEEPSORT] [--display]
116
+ [--frame_interval FRAME_INTERVAL]
117
+ [--display_width DISPLAY_WIDTH]
118
+ [--display_height DISPLAY_HEIGHT] [--save_path SAVE_PATH]
119
+ [--cpu] [--camera CAM]
120
+ VIDEO_PATH
121
+
122
+ # yolov3 + deepsort
123
+ python deepsort.py [VIDEO_PATH]
124
+
125
+ # yolov3_tiny + deepsort
126
+ python deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml
127
+
128
+ # yolov3 + deepsort on webcam
129
+ python3 deepsort.py /dev/video0 --camera 0
130
+
131
+ # yolov3_tiny + deepsort on webcam
132
+ python3 deepsort.py /dev/video0 --config_detection ./configs/yolov3_tiny.yaml --camera 0
133
+
134
+ # fast-reid + deepsort
135
+ python deepsort.py [VIDEO_PATH] --fastreid [--config_fastreid ./configs/fastreid.yaml]
136
+
137
+ # MMDetection + deepsort
138
+ python deepsort.py [VIDEO_PATH] --mmdet [--config_mmdetection ./configs/mmdet.yaml]
139
+ ```
140
+ Use `--display` to enable display.
141
+ Results will be saved to `./output/results.avi` and `./output/results.txt`.
142
+
143
+ All files above can also be accessed from BaiduDisk!
144
+ linker:[BaiduDisk](https://pan.baidu.com/s/1YJ1iPpdFTlUyLFoonYvozg)
145
+ passwd:fbuw
146
+
147
+ ## Training the RE-ID model
148
+ The original model used in paper is in original_model.py, and its parameter here [original_ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6).
149
+
150
+ To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset.
151
+
152
+ Then you can try [train.py](deep_sort/deep/train.py) to train your own parameter and evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py).
153
+ ![train.jpg](deep_sort/deep/train.jpg)
154
+
155
+ ## Demo videos and images
156
+ [demo.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6)
157
+ [demo2.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6)
158
+
159
+ ![1.jpg](demo/1.jpg)
160
+ ![2.jpg](demo/2.jpg)
161
+
162
+
163
+ ## References
164
+ - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402)
165
+
166
+ - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort)
167
+
168
+ - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
169
+
170
+ - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/)
deep_sort_torch/configs/deep_sort.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ DEEPSORT:
2
+ REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7"
3
+ MAX_DIST: 0.2
4
+ MIN_CONFIDENCE: 0.3
5
+ NMS_MAX_OVERLAP: 0.5
6
+ MAX_IOU_DISTANCE: 0.7
7
+ MAX_AGE: 70
8
+ N_INIT: 3
9
+ NN_BUDGET: 100
10
+
deep_sort_torch/configs/fastreid.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ FASTREID:
2
+ CFG: "thirdparty/fast-reid/configs/Market1501/bagtricks_R50.yml"
3
+ CHECKPOINT: "deep_sort/deep/checkpoint/market_bot_R50.pth"
deep_sort_torch/configs/mmdet.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ MMDET:
2
+ CFG: "thirdparty/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py"
3
+ CHECKPOINT: "detector/MMDet/weight/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
4
+
5
+ SCORE_THRESH: 0.5
deep_sort_torch/configs/yolov3.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ YOLOV3:
2
+ CFG: "./detector/YOLOv3/cfg/yolo_v3.cfg"
3
+ WEIGHT: "./detector/YOLOv3/weight/yolov3.weights"
4
+ CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names"
5
+
6
+ SCORE_THRESH: 0.5
7
+ NMS_THRESH: 0.4
deep_sort_torch/configs/yolov3_tiny.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ YOLOV3:
2
+ CFG: "./detector/YOLOv3/cfg/yolov3-tiny.cfg"
3
+ WEIGHT: "./detector/YOLOv3/weight/yolov3-tiny.weights"
4
+ CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names"
5
+
6
+ SCORE_THRESH: 0.5
7
+ NMS_THRESH: 0.4
deep_sort_torch/deep_sort/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Deep Sort
2
+
3
+ This is the implemention of deep sort with pytorch.
deep_sort_torch/deep_sort/__init__.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .deep_sort import DeepSort
2
+
3
+
4
+ __all__ = ['DeepSort', 'build_tracker']
5
+
6
+
7
+ def build_tracker(cfg, use_cuda):
8
+ if cfg.USE_FASTREID:
9
+ return DeepSort(model_path=cfg.FASTREID.CHECKPOINT, model_config=cfg.FASTREID.CFG,
10
+ max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
11
+ nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
12
+ max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
13
+
14
+ else:
15
+ return DeepSort(model_path=cfg.DEEPSORT.REID_CKPT,
16
+ max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
17
+ nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
18
+ max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
deep_sort_torch/deep_sort/deep/__init__.py ADDED
File without changes
deep_sort_torch/deep_sort/deep/checkpoint/.gitkeep ADDED
File without changes
deep_sort_torch/deep_sort/deep/evaluate.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ features = torch.load("features.pth")
4
+ qf = features["qf"]
5
+ ql = features["ql"]
6
+ gf = features["gf"]
7
+ gl = features["gl"]
8
+
9
+ scores = qf.mm(gf.t())
10
+ res = scores.topk(5, dim=1)[1][:,0]
11
+ top1correct = gl[res].eq(ql).sum().item()
12
+
13
+ print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
14
+
15
+
deep_sort_torch/deep_sort/deep/feature_extractor.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.transforms as transforms
3
+ import numpy as np
4
+ import cv2
5
+ import logging
6
+
7
+ from .model import Net
8
+ # from .fastreid.config import get_cfg
9
+ # from .fastreid.engine import DefaultTrainer
10
+ # from .fastreid.utils.checkpoint import Checkpointer
11
+
12
+ class Extractor(object):
13
+ def __init__(self, model_path, use_cuda=True):
14
+ self.net = Net(reid=True)
15
+ self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
16
+ state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
17
+ self.net.load_state_dict(state_dict)
18
+ logger = logging.getLogger("root.tracker")
19
+ logger.info("Loading weights from {}... Done!".format(model_path))
20
+ self.net.to(self.device)
21
+ self.size = (64, 128)
22
+ self.norm = transforms.Compose([
23
+ transforms.ToTensor(),
24
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
25
+ ])
26
+
27
+
28
+
29
+ def _preprocess(self, im_crops):
30
+ """
31
+ TODO:
32
+ 1. to float with scale from 0 to 1
33
+ 2. resize to (64, 128) as Market1501 dataset did
34
+ 3. concatenate to a numpy array
35
+ 3. to torch Tensor
36
+ 4. normalize
37
+ """
38
+ def _resize(im, size):
39
+ return cv2.resize(im.astype(np.float32)/255., size)
40
+
41
+ im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
42
+ return im_batch
43
+
44
+
45
+ def __call__(self, im_crops):
46
+ im_batch = self._preprocess(im_crops)
47
+ with torch.no_grad():
48
+ im_batch = im_batch.to(self.device)
49
+ features = self.net(im_batch)
50
+ return features.cpu().numpy()
51
+
52
+ class FastReIDExtractor(object):
53
+ def __init__(self, model_config, model_path, use_cuda=True):
54
+ cfg = get_cfg()
55
+ cfg.merge_from_file(model_config)
56
+ cfg.MODEL.BACKBONE.PRETRAIN = False
57
+ self.net = DefaultTrainer.build_model(cfg)
58
+ self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
59
+
60
+ Checkpointer(self.net).load(model_path)
61
+ logger = logging.getLogger("root.tracker")
62
+ logger.info("Loading weights from {}... Done!".format(model_path))
63
+ self.net.to(self.device)
64
+ self.net.eval()
65
+ height, width = cfg.INPUT.SIZE_TEST
66
+ self.size = (width, height)
67
+ self.norm = transforms.Compose([
68
+ transforms.ToTensor(),
69
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
70
+ ])
71
+
72
+
73
+ def _preprocess(self, im_crops):
74
+ def _resize(im, size):
75
+ return cv2.resize(im.astype(np.float32)/255., size)
76
+
77
+ im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
78
+ return im_batch
79
+
80
+
81
+ def __call__(self, im_crops):
82
+ im_batch = self._preprocess(im_crops)
83
+ with torch.no_grad():
84
+ im_batch = im_batch.to(self.device)
85
+ features = self.net(im_batch)
86
+ return features.cpu().numpy()
87
+
88
+
89
+
90
+ if __name__ == '__main__':
91
+ img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
92
+ extr = Extractor("checkpoint/ckpt.t7")
93
+ feature = extr(img)
94
+ print(feature.shape)
95
+
deep_sort_torch/deep_sort/deep/model.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ class BasicBlock(nn.Module):
6
+ def __init__(self, c_in, c_out,is_downsample=False):
7
+ super(BasicBlock,self).__init__()
8
+ self.is_downsample = is_downsample
9
+ if is_downsample:
10
+ self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
11
+ else:
12
+ self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
13
+ self.bn1 = nn.BatchNorm2d(c_out)
14
+ self.relu = nn.ReLU(True)
15
+ self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
16
+ self.bn2 = nn.BatchNorm2d(c_out)
17
+ if is_downsample:
18
+ self.downsample = nn.Sequential(
19
+ nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
20
+ nn.BatchNorm2d(c_out)
21
+ )
22
+ elif c_in != c_out:
23
+ self.downsample = nn.Sequential(
24
+ nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
25
+ nn.BatchNorm2d(c_out)
26
+ )
27
+ self.is_downsample = True
28
+
29
+ def forward(self,x):
30
+ y = self.conv1(x)
31
+ y = self.bn1(y)
32
+ y = self.relu(y)
33
+ y = self.conv2(y)
34
+ y = self.bn2(y)
35
+ if self.is_downsample:
36
+ x = self.downsample(x)
37
+ return F.relu(x.add(y),True)
38
+
39
+ def make_layers(c_in,c_out,repeat_times, is_downsample=False):
40
+ blocks = []
41
+ for i in range(repeat_times):
42
+ if i ==0:
43
+ blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
44
+ else:
45
+ blocks += [BasicBlock(c_out,c_out),]
46
+ return nn.Sequential(*blocks)
47
+
48
+ class Net(nn.Module):
49
+ def __init__(self, num_classes=751 ,reid=False):
50
+ super(Net,self).__init__()
51
+ # 3 128 64
52
+ self.conv = nn.Sequential(
53
+ nn.Conv2d(3,64,3,stride=1,padding=1),
54
+ nn.BatchNorm2d(64),
55
+ nn.ReLU(inplace=True),
56
+ # nn.Conv2d(32,32,3,stride=1,padding=1),
57
+ # nn.BatchNorm2d(32),
58
+ # nn.ReLU(inplace=True),
59
+ nn.MaxPool2d(3,2,padding=1),
60
+ )
61
+ # 32 64 32
62
+ self.layer1 = make_layers(64,64,2,False)
63
+ # 32 64 32
64
+ self.layer2 = make_layers(64,128,2,True)
65
+ # 64 32 16
66
+ self.layer3 = make_layers(128,256,2,True)
67
+ # 128 16 8
68
+ self.layer4 = make_layers(256,512,2,True)
69
+ # 256 8 4
70
+ self.avgpool = nn.AvgPool2d((8,4),1)
71
+ # 256 1 1
72
+ self.reid = reid
73
+ self.classifier = nn.Sequential(
74
+ nn.Linear(512, 256),
75
+ nn.BatchNorm1d(256),
76
+ nn.ReLU(inplace=True),
77
+ nn.Dropout(),
78
+ nn.Linear(256, num_classes),
79
+ )
80
+
81
+ def forward(self, x):
82
+ x = self.conv(x)
83
+ x = self.layer1(x)
84
+ x = self.layer2(x)
85
+ x = self.layer3(x)
86
+ x = self.layer4(x)
87
+ x = self.avgpool(x)
88
+ x = x.view(x.size(0),-1)
89
+ # B x 128
90
+ if self.reid:
91
+ x = x.div(x.norm(p=2,dim=1,keepdim=True))
92
+ return x
93
+ # classifier
94
+ x = self.classifier(x)
95
+ return x
96
+
97
+
98
+ if __name__ == '__main__':
99
+ net = Net()
100
+ x = torch.randn(4,3,128,64)
101
+ y = net(x)
102
+ import ipdb; ipdb.set_trace()
103
+
104
+
deep_sort_torch/deep_sort/deep/original_model.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ class BasicBlock(nn.Module):
6
+ def __init__(self, c_in, c_out,is_downsample=False):
7
+ super(BasicBlock,self).__init__()
8
+ self.is_downsample = is_downsample
9
+ if is_downsample:
10
+ self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
11
+ else:
12
+ self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
13
+ self.bn1 = nn.BatchNorm2d(c_out)
14
+ self.relu = nn.ReLU(True)
15
+ self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
16
+ self.bn2 = nn.BatchNorm2d(c_out)
17
+ if is_downsample:
18
+ self.downsample = nn.Sequential(
19
+ nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
20
+ nn.BatchNorm2d(c_out)
21
+ )
22
+ elif c_in != c_out:
23
+ self.downsample = nn.Sequential(
24
+ nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
25
+ nn.BatchNorm2d(c_out)
26
+ )
27
+ self.is_downsample = True
28
+
29
+ def forward(self,x):
30
+ y = self.conv1(x)
31
+ y = self.bn1(y)
32
+ y = self.relu(y)
33
+ y = self.conv2(y)
34
+ y = self.bn2(y)
35
+ if self.is_downsample:
36
+ x = self.downsample(x)
37
+ return F.relu(x.add(y),True)
38
+
39
+ def make_layers(c_in,c_out,repeat_times, is_downsample=False):
40
+ blocks = []
41
+ for i in range(repeat_times):
42
+ if i ==0:
43
+ blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
44
+ else:
45
+ blocks += [BasicBlock(c_out,c_out),]
46
+ return nn.Sequential(*blocks)
47
+
48
+ class Net(nn.Module):
49
+ def __init__(self, num_classes=625 ,reid=False):
50
+ super(Net,self).__init__()
51
+ # 3 128 64
52
+ self.conv = nn.Sequential(
53
+ nn.Conv2d(3,32,3,stride=1,padding=1),
54
+ nn.BatchNorm2d(32),
55
+ nn.ELU(inplace=True),
56
+ nn.Conv2d(32,32,3,stride=1,padding=1),
57
+ nn.BatchNorm2d(32),
58
+ nn.ELU(inplace=True),
59
+ nn.MaxPool2d(3,2,padding=1),
60
+ )
61
+ # 32 64 32
62
+ self.layer1 = make_layers(32,32,2,False)
63
+ # 32 64 32
64
+ self.layer2 = make_layers(32,64,2,True)
65
+ # 64 32 16
66
+ self.layer3 = make_layers(64,128,2,True)
67
+ # 128 16 8
68
+ self.dense = nn.Sequential(
69
+ nn.Dropout(p=0.6),
70
+ nn.Linear(128*16*8, 128),
71
+ nn.BatchNorm1d(128),
72
+ nn.ELU(inplace=True)
73
+ )
74
+ # 256 1 1
75
+ self.reid = reid
76
+ self.batch_norm = nn.BatchNorm1d(128)
77
+ self.classifier = nn.Sequential(
78
+ nn.Linear(128, num_classes),
79
+ )
80
+
81
+ def forward(self, x):
82
+ x = self.conv(x)
83
+ x = self.layer1(x)
84
+ x = self.layer2(x)
85
+ x = self.layer3(x)
86
+
87
+ x = x.view(x.size(0),-1)
88
+ if self.reid:
89
+ x = self.dense[0](x)
90
+ x = self.dense[1](x)
91
+ x = x.div(x.norm(p=2,dim=1,keepdim=True))
92
+ return x
93
+ x = self.dense(x)
94
+ # B x 128
95
+ # classifier
96
+ x = self.classifier(x)
97
+ return x
98
+
99
+
100
+ if __name__ == '__main__':
101
+ net = Net(reid=True)
102
+ x = torch.randn(4,3,128,64)
103
+ y = net(x)
104
+ import ipdb; ipdb.set_trace()
105
+
106
+
deep_sort_torch/deep_sort/deep/test.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.backends.cudnn as cudnn
3
+ import torchvision
4
+
5
+ import argparse
6
+ import os
7
+
8
+ from model import Net
9
+
10
+ parser = argparse.ArgumentParser(description="Train on market1501")
11
+ parser.add_argument("--data-dir",default='data',type=str)
12
+ parser.add_argument("--no-cuda",action="store_true")
13
+ parser.add_argument("--gpu-id",default=0,type=int)
14
+ args = parser.parse_args()
15
+
16
+ # device
17
+ device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
18
+ if torch.cuda.is_available() and not args.no_cuda:
19
+ cudnn.benchmark = True
20
+
21
+ # data loader
22
+ root = args.data_dir
23
+ query_dir = os.path.join(root,"query")
24
+ gallery_dir = os.path.join(root,"gallery")
25
+ transform = torchvision.transforms.Compose([
26
+ torchvision.transforms.Resize((128,64)),
27
+ torchvision.transforms.ToTensor(),
28
+ torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
29
+ ])
30
+ queryloader = torch.utils.data.DataLoader(
31
+ torchvision.datasets.ImageFolder(query_dir, transform=transform),
32
+ batch_size=64, shuffle=False
33
+ )
34
+ galleryloader = torch.utils.data.DataLoader(
35
+ torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
36
+ batch_size=64, shuffle=False
37
+ )
38
+
39
+ # net definition
40
+ net = Net(reid=True)
41
+ assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
42
+ print('Loading from checkpoint/ckpt.t7')
43
+ checkpoint = torch.load("./checkpoint/ckpt.t7")
44
+ net_dict = checkpoint['net_dict']
45
+ net.load_state_dict(net_dict, strict=False)
46
+ net.eval()
47
+ net.to(device)
48
+
49
+ # compute features
50
+ query_features = torch.tensor([]).float()
51
+ query_labels = torch.tensor([]).long()
52
+ gallery_features = torch.tensor([]).float()
53
+ gallery_labels = torch.tensor([]).long()
54
+
55
+ with torch.no_grad():
56
+ for idx,(inputs,labels) in enumerate(queryloader):
57
+ inputs = inputs.to(device)
58
+ features = net(inputs).cpu()
59
+ query_features = torch.cat((query_features, features), dim=0)
60
+ query_labels = torch.cat((query_labels, labels))
61
+
62
+ for idx,(inputs,labels) in enumerate(galleryloader):
63
+ inputs = inputs.to(device)
64
+ features = net(inputs).cpu()
65
+ gallery_features = torch.cat((gallery_features, features), dim=0)
66
+ gallery_labels = torch.cat((gallery_labels, labels))
67
+
68
+ gallery_labels -= 2
69
+
70
+ # save features
71
+ features = {
72
+ "qf": query_features,
73
+ "ql": query_labels,
74
+ "gf": gallery_features,
75
+ "gl": gallery_labels
76
+ }
77
+ torch.save(features,"features.pth")
deep_sort_torch/deep_sort/deep/train.jpg ADDED
deep_sort_torch/deep_sort/deep/train.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import time
4
+
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import torch
8
+ import torch.backends.cudnn as cudnn
9
+ import torchvision
10
+
11
+ from model import Net
12
+
13
+ parser = argparse.ArgumentParser(description="Train on market1501")
14
+ parser.add_argument("--data-dir",default='data',type=str)
15
+ parser.add_argument("--no-cuda",action="store_true")
16
+ parser.add_argument("--gpu-id",default=0,type=int)
17
+ parser.add_argument("--lr",default=0.1, type=float)
18
+ parser.add_argument("--interval",'-i',default=20,type=int)
19
+ parser.add_argument('--resume', '-r',action='store_true')
20
+ args = parser.parse_args()
21
+
22
+ # device
23
+ device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
24
+ if torch.cuda.is_available() and not args.no_cuda:
25
+ cudnn.benchmark = True
26
+
27
+ # data loading
28
+ root = args.data_dir
29
+ train_dir = os.path.join(root,"train")
30
+ test_dir = os.path.join(root,"test")
31
+ transform_train = torchvision.transforms.Compose([
32
+ torchvision.transforms.RandomCrop((128,64),padding=4),
33
+ torchvision.transforms.RandomHorizontalFlip(),
34
+ torchvision.transforms.ToTensor(),
35
+ torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
36
+ ])
37
+ transform_test = torchvision.transforms.Compose([
38
+ torchvision.transforms.Resize((128,64)),
39
+ torchvision.transforms.ToTensor(),
40
+ torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
41
+ ])
42
+ trainloader = torch.utils.data.DataLoader(
43
+ torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
44
+ batch_size=64,shuffle=True
45
+ )
46
+ testloader = torch.utils.data.DataLoader(
47
+ torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
48
+ batch_size=64,shuffle=True
49
+ )
50
+ num_classes = max(len(trainloader.dataset.classes), len(testloader.dataset.classes))
51
+
52
+ # net definition
53
+ start_epoch = 0
54
+ net = Net(num_classes=num_classes)
55
+ if args.resume:
56
+ assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
57
+ print('Loading from checkpoint/ckpt.t7')
58
+ checkpoint = torch.load("./checkpoint/ckpt.t7")
59
+ # import ipdb; ipdb.set_trace()
60
+ net_dict = checkpoint['net_dict']
61
+ net.load_state_dict(net_dict)
62
+ best_acc = checkpoint['acc']
63
+ start_epoch = checkpoint['epoch']
64
+ net.to(device)
65
+
66
+ # loss and optimizer
67
+ criterion = torch.nn.CrossEntropyLoss()
68
+ optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
69
+ best_acc = 0.
70
+
71
+ # train function for each epoch
72
+ def train(epoch):
73
+ print("\nEpoch : %d"%(epoch+1))
74
+ net.train()
75
+ training_loss = 0.
76
+ train_loss = 0.
77
+ correct = 0
78
+ total = 0
79
+ interval = args.interval
80
+ start = time.time()
81
+ for idx, (inputs, labels) in enumerate(trainloader):
82
+ # forward
83
+ inputs,labels = inputs.to(device),labels.to(device)
84
+ outputs = net(inputs)
85
+ loss = criterion(outputs, labels)
86
+
87
+ # backward
88
+ optimizer.zero_grad()
89
+ loss.backward()
90
+ optimizer.step()
91
+
92
+ # accumurating
93
+ training_loss += loss.item()
94
+ train_loss += loss.item()
95
+ correct += outputs.max(dim=1)[1].eq(labels).sum().item()
96
+ total += labels.size(0)
97
+
98
+ # print
99
+ if (idx+1)%interval == 0:
100
+ end = time.time()
101
+ print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
102
+ 100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total
103
+ ))
104
+ training_loss = 0.
105
+ start = time.time()
106
+
107
+ return train_loss/len(trainloader), 1.- correct/total
108
+
109
+ def test(epoch):
110
+ global best_acc
111
+ net.eval()
112
+ test_loss = 0.
113
+ correct = 0
114
+ total = 0
115
+ start = time.time()
116
+ with torch.no_grad():
117
+ for idx, (inputs, labels) in enumerate(testloader):
118
+ inputs, labels = inputs.to(device), labels.to(device)
119
+ outputs = net(inputs)
120
+ loss = criterion(outputs, labels)
121
+
122
+ test_loss += loss.item()
123
+ correct += outputs.max(dim=1)[1].eq(labels).sum().item()
124
+ total += labels.size(0)
125
+
126
+ print("Testing ...")
127
+ end = time.time()
128
+ print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
129
+ 100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total
130
+ ))
131
+
132
+ # saving checkpoint
133
+ acc = 100.*correct/total
134
+ if acc > best_acc:
135
+ best_acc = acc
136
+ print("Saving parameters to checkpoint/ckpt.t7")
137
+ checkpoint = {
138
+ 'net_dict':net.state_dict(),
139
+ 'acc':acc,
140
+ 'epoch':epoch,
141
+ }
142
+ if not os.path.isdir('checkpoint'):
143
+ os.mkdir('checkpoint')
144
+ torch.save(checkpoint, './checkpoint/ckpt.t7')
145
+
146
+ return test_loss/len(testloader), 1.- correct/total
147
+
148
+ # plot figure
149
+ x_epoch = []
150
+ record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]}
151
+ fig = plt.figure()
152
+ ax0 = fig.add_subplot(121, title="loss")
153
+ ax1 = fig.add_subplot(122, title="top1err")
154
+ def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
155
+ global record
156
+ record['train_loss'].append(train_loss)
157
+ record['train_err'].append(train_err)
158
+ record['test_loss'].append(test_loss)
159
+ record['test_err'].append(test_err)
160
+
161
+ x_epoch.append(epoch)
162
+ ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
163
+ ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
164
+ ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
165
+ ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
166
+ if epoch == 0:
167
+ ax0.legend()
168
+ ax1.legend()
169
+ fig.savefig("train.jpg")
170
+
171
+ # lr decay
172
+ def lr_decay():
173
+ global optimizer
174
+ for params in optimizer.param_groups:
175
+ params['lr'] *= 0.1
176
+ lr = params['lr']
177
+ print("Learning rate adjusted to {}".format(lr))
178
+
179
+ def main():
180
+ for epoch in range(start_epoch, start_epoch+40):
181
+ train_loss, train_err = train(epoch)
182
+ test_loss, test_err = test(epoch)
183
+ draw_curve(epoch, train_loss, train_err, test_loss, test_err)
184
+ if (epoch+1)%20==0:
185
+ lr_decay()
186
+
187
+
188
+ if __name__ == '__main__':
189
+ main()
deep_sort_torch/deep_sort/deep_sort.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from .deep.feature_extractor import Extractor, FastReIDExtractor
5
+ from .sort.nn_matching import NearestNeighborDistanceMetric
6
+ from .sort.preprocessing import non_max_suppression
7
+ from .sort.detection import Detection
8
+ from .sort.tracker import Tracker
9
+
10
+
11
+ __all__ = ['DeepSort']
12
+
13
+
14
+ class DeepSort(object):
15
+ def __init__(self, model_path, model_config=None, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
16
+ self.min_confidence = min_confidence
17
+ self.nms_max_overlap = nms_max_overlap
18
+
19
+ if model_config is None:
20
+ self.extractor = Extractor(model_path, use_cuda=use_cuda)
21
+ else:
22
+ self.extractor = FastReIDExtractor(model_config, model_path, use_cuda=use_cuda)
23
+
24
+ max_cosine_distance = max_dist
25
+ metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
26
+ self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
27
+
28
+ def update(self, bbox_xywh, confidences, ori_img):
29
+ self.height, self.width = ori_img.shape[:2]
30
+ # generate detections
31
+ features = self._get_features(bbox_xywh, ori_img)
32
+ bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
33
+ detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
34
+
35
+ # run on non-maximum supression
36
+ boxes = np.array([d.tlwh for d in detections])
37
+ scores = np.array([d.confidence for d in detections])
38
+ indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
39
+ detections = [detections[i] for i in indices]
40
+
41
+ # update tracker
42
+ self.tracker.predict()
43
+ self.tracker.update(detections)
44
+
45
+ # output bbox identities
46
+ outputs = []
47
+ for track in self.tracker.tracks:
48
+ if not track.is_confirmed() or track.time_since_update > 1:
49
+ continue
50
+ box = track.to_tlwh()
51
+ x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
52
+ track_id = track.track_id
53
+ outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
54
+ if len(outputs) > 0:
55
+ outputs = np.stack(outputs,axis=0)
56
+ return outputs
57
+
58
+
59
+ """
60
+ TODO:
61
+ Convert bbox from xc_yc_w_h to xtl_ytl_w_h
62
+ Thanks [email protected] for reporting this bug!
63
+ """
64
+ @staticmethod
65
+ def _xywh_to_tlwh(bbox_xywh):
66
+ if isinstance(bbox_xywh, np.ndarray):
67
+ bbox_tlwh = bbox_xywh.copy()
68
+ elif isinstance(bbox_xywh, torch.Tensor):
69
+ bbox_tlwh = bbox_xywh.clone()
70
+ bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2.
71
+ bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2.
72
+ return bbox_tlwh
73
+
74
+
75
+ def _xywh_to_xyxy(self, bbox_xywh):
76
+ x,y,w,h = bbox_xywh
77
+ x1 = max(int(x-w/2),0)
78
+ x2 = min(int(x+w/2),self.width-1)
79
+ y1 = max(int(y-h/2),0)
80
+ y2 = min(int(y+h/2),self.height-1)
81
+ return x1,y1,x2,y2
82
+
83
+ def _tlwh_to_xyxy(self, bbox_tlwh):
84
+ """
85
+ TODO:
86
+ Convert bbox from xtl_ytl_w_h to xc_yc_w_h
87
+ Thanks [email protected] for reporting this bug!
88
+ """
89
+ x,y,w,h = bbox_tlwh
90
+ x1 = max(int(x),0)
91
+ x2 = min(int(x+w),self.width-1)
92
+ y1 = max(int(y),0)
93
+ y2 = min(int(y+h),self.height-1)
94
+ return x1,y1,x2,y2
95
+
96
+ def _xyxy_to_tlwh(self, bbox_xyxy):
97
+ x1,y1,x2,y2 = bbox_xyxy
98
+
99
+ t = x1
100
+ l = y1
101
+ w = int(x2-x1)
102
+ h = int(y2-y1)
103
+ return t,l,w,h
104
+
105
+ def _get_features(self, bbox_xywh, ori_img):
106
+ im_crops = []
107
+ for box in bbox_xywh:
108
+ x1,y1,x2,y2 = self._xywh_to_xyxy(box)
109
+ im = ori_img[y1:y2,x1:x2]
110
+ im_crops.append(im)
111
+ if im_crops:
112
+ features = self.extractor(im_crops)
113
+ else:
114
+ features = np.array([])
115
+ return features
116
+
117
+
deep_sort_torch/deep_sort/sort/__init__.py ADDED
File without changes
deep_sort_torch/deep_sort/sort/detection.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+
4
+
5
+ class Detection(object):
6
+ """
7
+ This class represents a bounding box detection in a single image.
8
+
9
+ Parameters
10
+ ----------
11
+ tlwh : array_like
12
+ Bounding box in format `(x, y, w, h)`.
13
+ confidence : float
14
+ Detector confidence score.
15
+ feature : array_like
16
+ A feature vector that describes the object contained in this image.
17
+
18
+ Attributes
19
+ ----------
20
+ tlwh : ndarray
21
+ Bounding box in format `(top left x, top left y, width, height)`.
22
+ confidence : ndarray
23
+ Detector confidence score.
24
+ feature : ndarray | NoneType
25
+ A feature vector that describes the object contained in this image.
26
+
27
+ """
28
+
29
+ def __init__(self, tlwh, confidence, feature):
30
+ self.tlwh = np.asarray(tlwh, dtype=np.float)
31
+ self.confidence = float(confidence)
32
+ self.feature = np.asarray(feature, dtype=np.float32)
33
+
34
+ def to_tlbr(self):
35
+ """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
36
+ `(top left, bottom right)`.
37
+ """
38
+ ret = self.tlwh.copy()
39
+ ret[2:] += ret[:2]
40
+ return ret
41
+
42
+ def to_xyah(self):
43
+ """Convert bounding box to format `(center x, center y, aspect ratio,
44
+ height)`, where the aspect ratio is `width / height`.
45
+ """
46
+ ret = self.tlwh.copy()
47
+ ret[:2] += ret[2:] / 2
48
+ ret[2] /= ret[3]
49
+ return ret
deep_sort_torch/deep_sort/sort/iou_matching.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ from . import linear_assignment
5
+
6
+
7
+ def iou(bbox, candidates):
8
+ """Computer intersection over union.
9
+
10
+ Parameters
11
+ ----------
12
+ bbox : ndarray
13
+ A bounding box in format `(top left x, top left y, width, height)`.
14
+ candidates : ndarray
15
+ A matrix of candidate bounding boxes (one per row) in the same format
16
+ as `bbox`.
17
+
18
+ Returns
19
+ -------
20
+ ndarray
21
+ The intersection over union in [0, 1] between the `bbox` and each
22
+ candidate. A higher score means a larger fraction of the `bbox` is
23
+ occluded by the candidate.
24
+
25
+ """
26
+ bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27
+ candidates_tl = candidates[:, :2]
28
+ candidates_br = candidates[:, :2] + candidates[:, 2:]
29
+
30
+ tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31
+ np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32
+ br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33
+ np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34
+ wh = np.maximum(0., br - tl)
35
+
36
+ area_intersection = wh.prod(axis=1)
37
+ area_bbox = bbox[2:].prod()
38
+ area_candidates = candidates[:, 2:].prod(axis=1)
39
+ return area_intersection / (area_bbox + area_candidates - area_intersection)
40
+
41
+
42
+ def iou_cost(tracks, detections, track_indices=None,
43
+ detection_indices=None):
44
+ """An intersection over union distance metric.
45
+
46
+ Parameters
47
+ ----------
48
+ tracks : List[deep_sort.track.Track]
49
+ A list of tracks.
50
+ detections : List[deep_sort.detection.Detection]
51
+ A list of detections.
52
+ track_indices : Optional[List[int]]
53
+ A list of indices to tracks that should be matched. Defaults to
54
+ all `tracks`.
55
+ detection_indices : Optional[List[int]]
56
+ A list of indices to detections that should be matched. Defaults
57
+ to all `detections`.
58
+
59
+ Returns
60
+ -------
61
+ ndarray
62
+ Returns a cost matrix of shape
63
+ len(track_indices), len(detection_indices) where entry (i, j) is
64
+ `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65
+
66
+ """
67
+ if track_indices is None:
68
+ track_indices = np.arange(len(tracks))
69
+ if detection_indices is None:
70
+ detection_indices = np.arange(len(detections))
71
+
72
+ cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73
+ for row, track_idx in enumerate(track_indices):
74
+ if tracks[track_idx].time_since_update > 1:
75
+ cost_matrix[row, :] = linear_assignment.INFTY_COST
76
+ continue
77
+
78
+ bbox = tracks[track_idx].to_tlwh()
79
+ candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80
+ cost_matrix[row, :] = 1. - iou(bbox, candidates)
81
+ return cost_matrix
deep_sort_torch/deep_sort/sort/kalman_filter.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import scipy.linalg
4
+
5
+
6
+ """
7
+ Table for the 0.95 quantile of the chi-square distribution with N degrees of
8
+ freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9
+ function and used as Mahalanobis gating threshold.
10
+ """
11
+ chi2inv95 = {
12
+ 1: 3.8415,
13
+ 2: 5.9915,
14
+ 3: 7.8147,
15
+ 4: 9.4877,
16
+ 5: 11.070,
17
+ 6: 12.592,
18
+ 7: 14.067,
19
+ 8: 15.507,
20
+ 9: 16.919}
21
+
22
+
23
+ class KalmanFilter(object):
24
+ """
25
+ A simple Kalman filter for tracking bounding boxes in image space.
26
+
27
+ The 8-dimensional state space
28
+
29
+ x, y, a, h, vx, vy, va, vh
30
+
31
+ contains the bounding box center position (x, y), aspect ratio a, height h,
32
+ and their respective velocities.
33
+
34
+ Object motion follows a constant velocity model. The bounding box location
35
+ (x, y, a, h) is taken as direct observation of the state space (linear
36
+ observation model).
37
+
38
+ """
39
+
40
+ def __init__(self):
41
+ ndim, dt = 4, 1.
42
+
43
+ # Create Kalman filter model matrices.
44
+ self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45
+ for i in range(ndim):
46
+ self._motion_mat[i, ndim + i] = dt
47
+ self._update_mat = np.eye(ndim, 2 * ndim)
48
+
49
+ # Motion and observation uncertainty are chosen relative to the current
50
+ # state estimate. These weights control the amount of uncertainty in
51
+ # the model. This is a bit hacky.
52
+ self._std_weight_position = 1. / 20
53
+ self._std_weight_velocity = 1. / 160
54
+
55
+ def initiate(self, measurement):
56
+ """Create track from unassociated measurement.
57
+
58
+ Parameters
59
+ ----------
60
+ measurement : ndarray
61
+ Bounding box coordinates (x, y, a, h) with center position (x, y),
62
+ aspect ratio a, and height h.
63
+
64
+ Returns
65
+ -------
66
+ (ndarray, ndarray)
67
+ Returns the mean vector (8 dimensional) and covariance matrix (8x8
68
+ dimensional) of the new track. Unobserved velocities are initialized
69
+ to 0 mean.
70
+
71
+ """
72
+ mean_pos = measurement
73
+ mean_vel = np.zeros_like(mean_pos)
74
+ mean = np.r_[mean_pos, mean_vel]
75
+
76
+ std = [
77
+ 2 * self._std_weight_position * measurement[3],
78
+ 2 * self._std_weight_position * measurement[3],
79
+ 1e-2,
80
+ 2 * self._std_weight_position * measurement[3],
81
+ 10 * self._std_weight_velocity * measurement[3],
82
+ 10 * self._std_weight_velocity * measurement[3],
83
+ 1e-5,
84
+ 10 * self._std_weight_velocity * measurement[3]]
85
+ covariance = np.diag(np.square(std))
86
+ return mean, covariance
87
+
88
+ def predict(self, mean, covariance):
89
+ """Run Kalman filter prediction step.
90
+
91
+ Parameters
92
+ ----------
93
+ mean : ndarray
94
+ The 8 dimensional mean vector of the object state at the previous
95
+ time step.
96
+ covariance : ndarray
97
+ The 8x8 dimensional covariance matrix of the object state at the
98
+ previous time step.
99
+
100
+ Returns
101
+ -------
102
+ (ndarray, ndarray)
103
+ Returns the mean vector and covariance matrix of the predicted
104
+ state. Unobserved velocities are initialized to 0 mean.
105
+
106
+ """
107
+ std_pos = [
108
+ self._std_weight_position * mean[3],
109
+ self._std_weight_position * mean[3],
110
+ 1e-2,
111
+ self._std_weight_position * mean[3]]
112
+ std_vel = [
113
+ self._std_weight_velocity * mean[3],
114
+ self._std_weight_velocity * mean[3],
115
+ 1e-5,
116
+ self._std_weight_velocity * mean[3]]
117
+ motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118
+
119
+ mean = np.dot(self._motion_mat, mean)
120
+ covariance = np.linalg.multi_dot((
121
+ self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122
+
123
+ return mean, covariance
124
+
125
+ def project(self, mean, covariance):
126
+ """Project state distribution to measurement space.
127
+
128
+ Parameters
129
+ ----------
130
+ mean : ndarray
131
+ The state's mean vector (8 dimensional array).
132
+ covariance : ndarray
133
+ The state's covariance matrix (8x8 dimensional).
134
+
135
+ Returns
136
+ -------
137
+ (ndarray, ndarray)
138
+ Returns the projected mean and covariance matrix of the given state
139
+ estimate.
140
+
141
+ """
142
+ std = [
143
+ self._std_weight_position * mean[3],
144
+ self._std_weight_position * mean[3],
145
+ 1e-1,
146
+ self._std_weight_position * mean[3]]
147
+ innovation_cov = np.diag(np.square(std))
148
+
149
+ mean = np.dot(self._update_mat, mean)
150
+ covariance = np.linalg.multi_dot((
151
+ self._update_mat, covariance, self._update_mat.T))
152
+ return mean, covariance + innovation_cov
153
+
154
+ def update(self, mean, covariance, measurement):
155
+ """Run Kalman filter correction step.
156
+
157
+ Parameters
158
+ ----------
159
+ mean : ndarray
160
+ The predicted state's mean vector (8 dimensional).
161
+ covariance : ndarray
162
+ The state's covariance matrix (8x8 dimensional).
163
+ measurement : ndarray
164
+ The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165
+ is the center position, a the aspect ratio, and h the height of the
166
+ bounding box.
167
+
168
+ Returns
169
+ -------
170
+ (ndarray, ndarray)
171
+ Returns the measurement-corrected state distribution.
172
+
173
+ """
174
+ projected_mean, projected_cov = self.project(mean, covariance)
175
+
176
+ chol_factor, lower = scipy.linalg.cho_factor(
177
+ projected_cov, lower=True, check_finite=False)
178
+ kalman_gain = scipy.linalg.cho_solve(
179
+ (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180
+ check_finite=False).T
181
+ innovation = measurement - projected_mean
182
+
183
+ new_mean = mean + np.dot(innovation, kalman_gain.T)
184
+ new_covariance = covariance - np.linalg.multi_dot((
185
+ kalman_gain, projected_cov, kalman_gain.T))
186
+ return new_mean, new_covariance
187
+
188
+ def gating_distance(self, mean, covariance, measurements,
189
+ only_position=False):
190
+ """Compute gating distance between state distribution and measurements.
191
+
192
+ A suitable distance threshold can be obtained from `chi2inv95`. If
193
+ `only_position` is False, the chi-square distribution has 4 degrees of
194
+ freedom, otherwise 2.
195
+
196
+ Parameters
197
+ ----------
198
+ mean : ndarray
199
+ Mean vector over the state distribution (8 dimensional).
200
+ covariance : ndarray
201
+ Covariance of the state distribution (8x8 dimensional).
202
+ measurements : ndarray
203
+ An Nx4 dimensional matrix of N measurements, each in
204
+ format (x, y, a, h) where (x, y) is the bounding box center
205
+ position, a the aspect ratio, and h the height.
206
+ only_position : Optional[bool]
207
+ If True, distance computation is done with respect to the bounding
208
+ box center position only.
209
+
210
+ Returns
211
+ -------
212
+ ndarray
213
+ Returns an array of length N, where the i-th element contains the
214
+ squared Mahalanobis distance between (mean, covariance) and
215
+ `measurements[i]`.
216
+
217
+ """
218
+ mean, covariance = self.project(mean, covariance)
219
+ if only_position:
220
+ mean, covariance = mean[:2], covariance[:2, :2]
221
+ measurements = measurements[:, :2]
222
+
223
+ cholesky_factor = np.linalg.cholesky(covariance)
224
+ d = measurements - mean
225
+ z = scipy.linalg.solve_triangular(
226
+ cholesky_factor, d.T, lower=True, check_finite=False,
227
+ overwrite_b=True)
228
+ squared_maha = np.sum(z * z, axis=0)
229
+ return squared_maha
deep_sort_torch/deep_sort/sort/linear_assignment.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ # from sklearn.utils.linear_assignment_ import linear_assignment
5
+ from scipy.optimize import linear_sum_assignment as linear_assignment
6
+ from . import kalman_filter
7
+
8
+
9
+ INFTY_COST = 1e+5
10
+
11
+
12
+ def min_cost_matching(
13
+ distance_metric, max_distance, tracks, detections, track_indices=None,
14
+ detection_indices=None):
15
+ """Solve linear assignment problem.
16
+
17
+ Parameters
18
+ ----------
19
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
20
+ The distance metric is given a list of tracks and detections as well as
21
+ a list of N track indices and M detection indices. The metric should
22
+ return the NxM dimensional cost matrix, where element (i, j) is the
23
+ association cost between the i-th track in the given track indices and
24
+ the j-th detection in the given detection_indices.
25
+ max_distance : float
26
+ Gating threshold. Associations with cost larger than this value are
27
+ disregarded.
28
+ tracks : List[track.Track]
29
+ A list of predicted tracks at the current time step.
30
+ detections : List[detection.Detection]
31
+ A list of detections at the current time step.
32
+ track_indices : List[int]
33
+ List of track indices that maps rows in `cost_matrix` to tracks in
34
+ `tracks` (see description above).
35
+ detection_indices : List[int]
36
+ List of detection indices that maps columns in `cost_matrix` to
37
+ detections in `detections` (see description above).
38
+
39
+ Returns
40
+ -------
41
+ (List[(int, int)], List[int], List[int])
42
+ Returns a tuple with the following three entries:
43
+ * A list of matched track and detection indices.
44
+ * A list of unmatched track indices.
45
+ * A list of unmatched detection indices.
46
+
47
+ """
48
+ if track_indices is None:
49
+ track_indices = np.arange(len(tracks))
50
+ if detection_indices is None:
51
+ detection_indices = np.arange(len(detections))
52
+
53
+ if len(detection_indices) == 0 or len(track_indices) == 0:
54
+ return [], track_indices, detection_indices # Nothing to match.
55
+
56
+ cost_matrix = distance_metric(
57
+ tracks, detections, track_indices, detection_indices)
58
+ cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
59
+
60
+ row_indices, col_indices = linear_assignment(cost_matrix)
61
+
62
+ matches, unmatched_tracks, unmatched_detections = [], [], []
63
+ for col, detection_idx in enumerate(detection_indices):
64
+ if col not in col_indices:
65
+ unmatched_detections.append(detection_idx)
66
+ for row, track_idx in enumerate(track_indices):
67
+ if row not in row_indices:
68
+ unmatched_tracks.append(track_idx)
69
+ for row, col in zip(row_indices, col_indices):
70
+ track_idx = track_indices[row]
71
+ detection_idx = detection_indices[col]
72
+ if cost_matrix[row, col] > max_distance:
73
+ unmatched_tracks.append(track_idx)
74
+ unmatched_detections.append(detection_idx)
75
+ else:
76
+ matches.append((track_idx, detection_idx))
77
+ return matches, unmatched_tracks, unmatched_detections
78
+
79
+
80
+ def matching_cascade(
81
+ distance_metric, max_distance, cascade_depth, tracks, detections,
82
+ track_indices=None, detection_indices=None):
83
+ """Run matching cascade.
84
+
85
+ Parameters
86
+ ----------
87
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
88
+ The distance metric is given a list of tracks and detections as well as
89
+ a list of N track indices and M detection indices. The metric should
90
+ return the NxM dimensional cost matrix, where element (i, j) is the
91
+ association cost between the i-th track in the given track indices and
92
+ the j-th detection in the given detection indices.
93
+ max_distance : float
94
+ Gating threshold. Associations with cost larger than this value are
95
+ disregarded.
96
+ cascade_depth: int
97
+ The cascade depth, should be se to the maximum track age.
98
+ tracks : List[track.Track]
99
+ A list of predicted tracks at the current time step.
100
+ detections : List[detection.Detection]
101
+ A list of detections at the current time step.
102
+ track_indices : Optional[List[int]]
103
+ List of track indices that maps rows in `cost_matrix` to tracks in
104
+ `tracks` (see description above). Defaults to all tracks.
105
+ detection_indices : Optional[List[int]]
106
+ List of detection indices that maps columns in `cost_matrix` to
107
+ detections in `detections` (see description above). Defaults to all
108
+ detections.
109
+
110
+ Returns
111
+ -------
112
+ (List[(int, int)], List[int], List[int])
113
+ Returns a tuple with the following three entries:
114
+ * A list of matched track and detection indices.
115
+ * A list of unmatched track indices.
116
+ * A list of unmatched detection indices.
117
+
118
+ """
119
+ if track_indices is None:
120
+ track_indices = list(range(len(tracks)))
121
+ if detection_indices is None:
122
+ detection_indices = list(range(len(detections)))
123
+
124
+ unmatched_detections = detection_indices
125
+ matches = []
126
+ for level in range(cascade_depth):
127
+ if len(unmatched_detections) == 0: # No detections left
128
+ break
129
+
130
+ track_indices_l = [
131
+ k for k in track_indices
132
+ if tracks[k].time_since_update == 1 + level
133
+ ]
134
+ if len(track_indices_l) == 0: # Nothing to match at this level
135
+ continue
136
+
137
+ matches_l, _, unmatched_detections = \
138
+ min_cost_matching(
139
+ distance_metric, max_distance, tracks, detections,
140
+ track_indices_l, unmatched_detections)
141
+ matches += matches_l
142
+ unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143
+ return matches, unmatched_tracks, unmatched_detections
144
+
145
+
146
+ def gate_cost_matrix(
147
+ kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148
+ gated_cost=INFTY_COST, only_position=False):
149
+ """Invalidate infeasible entries in cost matrix based on the state
150
+ distributions obtained by Kalman filtering.
151
+
152
+ Parameters
153
+ ----------
154
+ kf : The Kalman filter.
155
+ cost_matrix : ndarray
156
+ The NxM dimensional cost matrix, where N is the number of track indices
157
+ and M is the number of detection indices, such that entry (i, j) is the
158
+ association cost between `tracks[track_indices[i]]` and
159
+ `detections[detection_indices[j]]`.
160
+ tracks : List[track.Track]
161
+ A list of predicted tracks at the current time step.
162
+ detections : List[detection.Detection]
163
+ A list of detections at the current time step.
164
+ track_indices : List[int]
165
+ List of track indices that maps rows in `cost_matrix` to tracks in
166
+ `tracks` (see description above).
167
+ detection_indices : List[int]
168
+ List of detection indices that maps columns in `cost_matrix` to
169
+ detections in `detections` (see description above).
170
+ gated_cost : Optional[float]
171
+ Entries in the cost matrix corresponding to infeasible associations are
172
+ set this value. Defaults to a very large value.
173
+ only_position : Optional[bool]
174
+ If True, only the x, y position of the state distribution is considered
175
+ during gating. Defaults to False.
176
+
177
+ Returns
178
+ -------
179
+ ndarray
180
+ Returns the modified cost matrix.
181
+
182
+ """
183
+ gating_dim = 2 if only_position else 4
184
+ gating_threshold = kalman_filter.chi2inv95[gating_dim]
185
+ measurements = np.asarray(
186
+ [detections[i].to_xyah() for i in detection_indices])
187
+ for row, track_idx in enumerate(track_indices):
188
+ track = tracks[track_idx]
189
+ gating_distance = kf.gating_distance(
190
+ track.mean, track.covariance, measurements, only_position)
191
+ cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192
+ return cost_matrix
deep_sort_torch/deep_sort/sort/nn_matching.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+
4
+
5
+ def _pdist(a, b):
6
+ """Compute pair-wise squared distance between points in `a` and `b`.
7
+
8
+ Parameters
9
+ ----------
10
+ a : array_like
11
+ An NxM matrix of N samples of dimensionality M.
12
+ b : array_like
13
+ An LxM matrix of L samples of dimensionality M.
14
+
15
+ Returns
16
+ -------
17
+ ndarray
18
+ Returns a matrix of size len(a), len(b) such that eleement (i, j)
19
+ contains the squared distance between `a[i]` and `b[j]`.
20
+
21
+ """
22
+ a, b = np.asarray(a), np.asarray(b)
23
+ if len(a) == 0 or len(b) == 0:
24
+ return np.zeros((len(a), len(b)))
25
+ a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26
+ r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27
+ r2 = np.clip(r2, 0., float(np.inf))
28
+ return r2
29
+
30
+
31
+ def _cosine_distance(a, b, data_is_normalized=False):
32
+ """Compute pair-wise cosine distance between points in `a` and `b`.
33
+
34
+ Parameters
35
+ ----------
36
+ a : array_like
37
+ An NxM matrix of N samples of dimensionality M.
38
+ b : array_like
39
+ An LxM matrix of L samples of dimensionality M.
40
+ data_is_normalized : Optional[bool]
41
+ If True, assumes rows in a and b are unit length vectors.
42
+ Otherwise, a and b are explicitly normalized to lenght 1.
43
+
44
+ Returns
45
+ -------
46
+ ndarray
47
+ Returns a matrix of size len(a), len(b) such that eleement (i, j)
48
+ contains the squared distance between `a[i]` and `b[j]`.
49
+
50
+ """
51
+ if not data_is_normalized:
52
+ a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53
+ b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54
+ return 1. - np.dot(a, b.T)
55
+
56
+
57
+ def _nn_euclidean_distance(x, y):
58
+ """ Helper function for nearest neighbor distance metric (Euclidean).
59
+
60
+ Parameters
61
+ ----------
62
+ x : ndarray
63
+ A matrix of N row-vectors (sample points).
64
+ y : ndarray
65
+ A matrix of M row-vectors (query points).
66
+
67
+ Returns
68
+ -------
69
+ ndarray
70
+ A vector of length M that contains for each entry in `y` the
71
+ smallest Euclidean distance to a sample in `x`.
72
+
73
+ """
74
+ distances = _pdist(x, y)
75
+ return np.maximum(0.0, distances.min(axis=0))
76
+
77
+
78
+ def _nn_cosine_distance(x, y):
79
+ """ Helper function for nearest neighbor distance metric (cosine).
80
+
81
+ Parameters
82
+ ----------
83
+ x : ndarray
84
+ A matrix of N row-vectors (sample points).
85
+ y : ndarray
86
+ A matrix of M row-vectors (query points).
87
+
88
+ Returns
89
+ -------
90
+ ndarray
91
+ A vector of length M that contains for each entry in `y` the
92
+ smallest cosine distance to a sample in `x`.
93
+
94
+ """
95
+ distances = _cosine_distance(x, y)
96
+ return distances.min(axis=0)
97
+
98
+
99
+ class NearestNeighborDistanceMetric(object):
100
+ """
101
+ A nearest neighbor distance metric that, for each target, returns
102
+ the closest distance to any sample that has been observed so far.
103
+
104
+ Parameters
105
+ ----------
106
+ metric : str
107
+ Either "euclidean" or "cosine".
108
+ matching_threshold: float
109
+ The matching threshold. Samples with larger distance are considered an
110
+ invalid match.
111
+ budget : Optional[int]
112
+ If not None, fix samples per class to at most this number. Removes
113
+ the oldest samples when the budget is reached.
114
+
115
+ Attributes
116
+ ----------
117
+ samples : Dict[int -> List[ndarray]]
118
+ A dictionary that maps from target identities to the list of samples
119
+ that have been observed so far.
120
+
121
+ """
122
+
123
+ def __init__(self, metric, matching_threshold, budget=None):
124
+
125
+
126
+ if metric == "euclidean":
127
+ self._metric = _nn_euclidean_distance
128
+ elif metric == "cosine":
129
+ self._metric = _nn_cosine_distance
130
+ else:
131
+ raise ValueError(
132
+ "Invalid metric; must be either 'euclidean' or 'cosine'")
133
+ self.matching_threshold = matching_threshold
134
+ self.budget = budget
135
+ self.samples = {}
136
+
137
+ def partial_fit(self, features, targets, active_targets):
138
+ """Update the distance metric with new data.
139
+
140
+ Parameters
141
+ ----------
142
+ features : ndarray
143
+ An NxM matrix of N features of dimensionality M.
144
+ targets : ndarray
145
+ An integer array of associated target identities.
146
+ active_targets : List[int]
147
+ A list of targets that are currently present in the scene.
148
+
149
+ """
150
+ for feature, target in zip(features, targets):
151
+ self.samples.setdefault(target, []).append(feature)
152
+ if self.budget is not None:
153
+ self.samples[target] = self.samples[target][-self.budget:]
154
+ self.samples = {k: self.samples[k] for k in active_targets}
155
+
156
+ def distance(self, features, targets):
157
+ """Compute distance between features and targets.
158
+
159
+ Parameters
160
+ ----------
161
+ features : ndarray
162
+ An NxM matrix of N features of dimensionality M.
163
+ targets : List[int]
164
+ A list of targets to match the given `features` against.
165
+
166
+ Returns
167
+ -------
168
+ ndarray
169
+ Returns a cost matrix of shape len(targets), len(features), where
170
+ element (i, j) contains the closest squared distance between
171
+ `targets[i]` and `features[j]`.
172
+
173
+ """
174
+ cost_matrix = np.zeros((len(targets), len(features)))
175
+ for i, target in enumerate(targets):
176
+ cost_matrix[i, :] = self._metric(self.samples[target], features)
177
+ return cost_matrix
deep_sort_torch/deep_sort/sort/preprocessing.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import cv2
4
+
5
+
6
+ def non_max_suppression(boxes, max_bbox_overlap, scores=None):
7
+ """Suppress overlapping detections.
8
+
9
+ Original code from [1]_ has been adapted to include confidence score.
10
+
11
+ .. [1] http://www.pyimagesearch.com/2015/02/16/
12
+ faster-non-maximum-suppression-python/
13
+
14
+ Examples
15
+ --------
16
+
17
+ >>> boxes = [d.roi for d in detections]
18
+ >>> scores = [d.confidence for d in detections]
19
+ >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20
+ >>> detections = [detections[i] for i in indices]
21
+
22
+ Parameters
23
+ ----------
24
+ boxes : ndarray
25
+ Array of ROIs (x, y, width, height).
26
+ max_bbox_overlap : float
27
+ ROIs that overlap more than this values are suppressed.
28
+ scores : Optional[array_like]
29
+ Detector confidence score.
30
+
31
+ Returns
32
+ -------
33
+ List[int]
34
+ Returns indices of detections that have survived non-maxima suppression.
35
+
36
+ """
37
+ if len(boxes) == 0:
38
+ return []
39
+
40
+ boxes = boxes.astype(np.float)
41
+ pick = []
42
+
43
+ x1 = boxes[:, 0]
44
+ y1 = boxes[:, 1]
45
+ x2 = boxes[:, 2] + boxes[:, 0]
46
+ y2 = boxes[:, 3] + boxes[:, 1]
47
+
48
+ area = (x2 - x1 + 1) * (y2 - y1 + 1)
49
+ if scores is not None:
50
+ idxs = np.argsort(scores)
51
+ else:
52
+ idxs = np.argsort(y2)
53
+
54
+ while len(idxs) > 0:
55
+ last = len(idxs) - 1
56
+ i = idxs[last]
57
+ pick.append(i)
58
+
59
+ xx1 = np.maximum(x1[i], x1[idxs[:last]])
60
+ yy1 = np.maximum(y1[i], y1[idxs[:last]])
61
+ xx2 = np.minimum(x2[i], x2[idxs[:last]])
62
+ yy2 = np.minimum(y2[i], y2[idxs[:last]])
63
+
64
+ w = np.maximum(0, xx2 - xx1 + 1)
65
+ h = np.maximum(0, yy2 - yy1 + 1)
66
+
67
+ overlap = (w * h) / area[idxs[:last]]
68
+
69
+ idxs = np.delete(
70
+ idxs, np.concatenate(
71
+ ([last], np.where(overlap > max_bbox_overlap)[0])))
72
+
73
+ return pick
deep_sort_torch/deep_sort/sort/track.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+
3
+
4
+ class TrackState:
5
+ """
6
+ Enumeration type for the single target track state. Newly created tracks are
7
+ classified as `tentative` until enough evidence has been collected. Then,
8
+ the track state is changed to `confirmed`. Tracks that are no longer alive
9
+ are classified as `deleted` to mark them for removal from the set of active
10
+ tracks.
11
+
12
+ """
13
+
14
+ Tentative = 1
15
+ Confirmed = 2
16
+ Deleted = 3
17
+
18
+
19
+ class Track:
20
+ """
21
+ A single target track with state space `(x, y, a, h)` and associated
22
+ velocities, where `(x, y)` is the center of the bounding box, `a` is the
23
+ aspect ratio and `h` is the height.
24
+
25
+ Parameters
26
+ ----------
27
+ mean : ndarray
28
+ Mean vector of the initial state distribution.
29
+ covariance : ndarray
30
+ Covariance matrix of the initial state distribution.
31
+ track_id : int
32
+ A unique track identifier.
33
+ n_init : int
34
+ Number of consecutive detections before the track is confirmed. The
35
+ track state is set to `Deleted` if a miss occurs within the first
36
+ `n_init` frames.
37
+ max_age : int
38
+ The maximum number of consecutive misses before the track state is
39
+ set to `Deleted`.
40
+ feature : Optional[ndarray]
41
+ Feature vector of the detection this track originates from. If not None,
42
+ this feature is added to the `features` cache.
43
+
44
+ Attributes
45
+ ----------
46
+ mean : ndarray
47
+ Mean vector of the initial state distribution.
48
+ covariance : ndarray
49
+ Covariance matrix of the initial state distribution.
50
+ track_id : int
51
+ A unique track identifier.
52
+ hits : int
53
+ Total number of measurement updates.
54
+ age : int
55
+ Total number of frames since first occurance.
56
+ time_since_update : int
57
+ Total number of frames since last measurement update.
58
+ state : TrackState
59
+ The current track state.
60
+ features : List[ndarray]
61
+ A cache of features. On each measurement update, the associated feature
62
+ vector is added to this list.
63
+
64
+ """
65
+
66
+ def __init__(self, mean, covariance, track_id, n_init, max_age,
67
+ feature=None):
68
+ self.mean = mean
69
+ self.covariance = covariance
70
+ self.track_id = track_id
71
+ self.hits = 1
72
+ self.age = 1
73
+ self.time_since_update = 0
74
+
75
+ self.state = TrackState.Tentative
76
+ self.features = []
77
+ if feature is not None:
78
+ self.features.append(feature)
79
+
80
+ self._n_init = n_init
81
+ self._max_age = max_age
82
+
83
+ def to_tlwh(self):
84
+ """Get current position in bounding box format `(top left x, top left y,
85
+ width, height)`.
86
+
87
+ Returns
88
+ -------
89
+ ndarray
90
+ The bounding box.
91
+
92
+ """
93
+ ret = self.mean[:4].copy()
94
+ ret[2] *= ret[3]
95
+ ret[:2] -= ret[2:] / 2
96
+ return ret
97
+
98
+ def to_tlbr(self):
99
+ """Get current position in bounding box format `(min x, miny, max x,
100
+ max y)`.
101
+
102
+ Returns
103
+ -------
104
+ ndarray
105
+ The bounding box.
106
+
107
+ """
108
+ ret = self.to_tlwh()
109
+ ret[2:] = ret[:2] + ret[2:]
110
+ return ret
111
+
112
+ def predict(self, kf):
113
+ """Propagate the state distribution to the current time step using a
114
+ Kalman filter prediction step.
115
+
116
+ Parameters
117
+ ----------
118
+ kf : kalman_filter.KalmanFilter
119
+ The Kalman filter.
120
+
121
+ """
122
+ self.mean, self.covariance = kf.predict(self.mean, self.covariance)
123
+ self.age += 1
124
+ self.time_since_update += 1
125
+
126
+ def update(self, kf, detection):
127
+ """Perform Kalman filter measurement update step and update the feature
128
+ cache.
129
+
130
+ Parameters
131
+ ----------
132
+ kf : kalman_filter.KalmanFilter
133
+ The Kalman filter.
134
+ detection : Detection
135
+ The associated detection.
136
+
137
+ """
138
+ self.mean, self.covariance = kf.update(
139
+ self.mean, self.covariance, detection.to_xyah())
140
+ self.features.append(detection.feature)
141
+
142
+ self.hits += 1
143
+ self.time_since_update = 0
144
+ if self.state == TrackState.Tentative and self.hits >= self._n_init:
145
+ self.state = TrackState.Confirmed
146
+
147
+ def mark_missed(self):
148
+ """Mark this track as missed (no association at the current time step).
149
+ """
150
+ if self.state == TrackState.Tentative:
151
+ self.state = TrackState.Deleted
152
+ elif self.time_since_update > self._max_age:
153
+ self.state = TrackState.Deleted
154
+
155
+ def is_tentative(self):
156
+ """Returns True if this track is tentative (unconfirmed).
157
+ """
158
+ return self.state == TrackState.Tentative
159
+
160
+ def is_confirmed(self):
161
+ """Returns True if this track is confirmed."""
162
+ return self.state == TrackState.Confirmed
163
+
164
+ def is_deleted(self):
165
+ """Returns True if this track is dead and should be deleted."""
166
+ return self.state == TrackState.Deleted
deep_sort_torch/deep_sort/sort/tracker.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ from . import kalman_filter
5
+ from . import linear_assignment
6
+ from . import iou_matching
7
+ from .track import Track
8
+
9
+
10
+ class Tracker:
11
+ """
12
+ This is the multi-target tracker.
13
+
14
+ Parameters
15
+ ----------
16
+ metric : nn_matching.NearestNeighborDistanceMetric
17
+ A distance metric for measurement-to-track association.
18
+ max_age : int
19
+ Maximum number of missed misses before a track is deleted.
20
+ n_init : int
21
+ Number of consecutive detections before the track is confirmed. The
22
+ track state is set to `Deleted` if a miss occurs within the first
23
+ `n_init` frames.
24
+
25
+ Attributes
26
+ ----------
27
+ metric : nn_matching.NearestNeighborDistanceMetric
28
+ The distance metric used for measurement to track association.
29
+ max_age : int
30
+ Maximum number of missed misses before a track is deleted.
31
+ n_init : int
32
+ Number of frames that a track remains in initialization phase.
33
+ kf : kalman_filter.KalmanFilter
34
+ A Kalman filter to filter target trajectories in image space.
35
+ tracks : List[Track]
36
+ The list of active tracks at the current time step.
37
+
38
+ """
39
+
40
+ def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
41
+ self.metric = metric
42
+ self.max_iou_distance = max_iou_distance
43
+ self.max_age = max_age
44
+ self.n_init = n_init
45
+
46
+ self.kf = kalman_filter.KalmanFilter()
47
+ self.tracks = []
48
+ self._next_id = 1
49
+
50
+ def predict(self):
51
+ """Propagate track state distributions one time step forward.
52
+
53
+ This function should be called once every time step, before `update`.
54
+ """
55
+ for track in self.tracks:
56
+ track.predict(self.kf)
57
+
58
+ def update(self, detections):
59
+ """Perform measurement update and track management.
60
+
61
+ Parameters
62
+ ----------
63
+ detections : List[deep_sort.detection.Detection]
64
+ A list of detections at the current time step.
65
+
66
+ """
67
+ # Run matching cascade.
68
+ matches, unmatched_tracks, unmatched_detections = \
69
+ self._match(detections)
70
+
71
+ # Update track set.
72
+ for track_idx, detection_idx in matches:
73
+ self.tracks[track_idx].update(
74
+ self.kf, detections[detection_idx])
75
+ for track_idx in unmatched_tracks:
76
+ self.tracks[track_idx].mark_missed()
77
+ for detection_idx in unmatched_detections:
78
+ self._initiate_track(detections[detection_idx])
79
+ self.tracks = [t for t in self.tracks if not t.is_deleted()]
80
+
81
+ # Update distance metric.
82
+ active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
83
+ features, targets = [], []
84
+ for track in self.tracks:
85
+ if not track.is_confirmed():
86
+ continue
87
+ features += track.features
88
+ targets += [track.track_id for _ in track.features]
89
+ track.features = []
90
+ self.metric.partial_fit(
91
+ np.asarray(features), np.asarray(targets), active_targets)
92
+
93
+ def _match(self, detections):
94
+
95
+ def gated_metric(tracks, dets, track_indices, detection_indices):
96
+ features = np.array([dets[i].feature for i in detection_indices])
97
+ targets = np.array([tracks[i].track_id for i in track_indices])
98
+ cost_matrix = self.metric.distance(features, targets)
99
+ cost_matrix = linear_assignment.gate_cost_matrix(
100
+ self.kf, cost_matrix, tracks, dets, track_indices,
101
+ detection_indices)
102
+
103
+ return cost_matrix
104
+
105
+ # Split track set into confirmed and unconfirmed tracks.
106
+ confirmed_tracks = [
107
+ i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108
+ unconfirmed_tracks = [
109
+ i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110
+
111
+ # Associate confirmed tracks using appearance features.
112
+ matches_a, unmatched_tracks_a, unmatched_detections = \
113
+ linear_assignment.matching_cascade(
114
+ gated_metric, self.metric.matching_threshold, self.max_age,
115
+ self.tracks, detections, confirmed_tracks)
116
+
117
+ # Associate remaining tracks together with unconfirmed tracks using IOU.
118
+ iou_track_candidates = unconfirmed_tracks + [
119
+ k for k in unmatched_tracks_a if
120
+ self.tracks[k].time_since_update == 1]
121
+ unmatched_tracks_a = [
122
+ k for k in unmatched_tracks_a if
123
+ self.tracks[k].time_since_update != 1]
124
+ matches_b, unmatched_tracks_b, unmatched_detections = \
125
+ linear_assignment.min_cost_matching(
126
+ iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127
+ detections, iou_track_candidates, unmatched_detections)
128
+
129
+ matches = matches_a + matches_b
130
+ unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131
+ return matches, unmatched_tracks, unmatched_detections
132
+
133
+ def _initiate_track(self, detection):
134
+ mean, covariance = self.kf.initiate(detection.to_xyah())
135
+ self.tracks.append(Track(
136
+ mean, covariance, self._next_id, self.n_init, self.max_age,
137
+ detection.feature))
138
+ self._next_id += 1
deep_sort_torch/deepsort.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import time
4
+ import argparse
5
+ import torch
6
+ import warnings
7
+ import numpy as np
8
+ import sys
9
+
10
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'thirdparty/fast-reid'))
11
+
12
+
13
+ from detector import build_detector
14
+ from deep_sort import build_tracker
15
+ from utils.draw import draw_boxes
16
+ from utils.parser import get_config
17
+ from utils.log import get_logger
18
+ from utils.io import write_results
19
+
20
+
21
+
22
+ class VideoTracker(object):
23
+ def __init__(self, cfg, args, video_path):
24
+ self.cfg = cfg
25
+ self.args = args
26
+ self.video_path = video_path
27
+ self.logger = get_logger("root")
28
+
29
+ use_cuda = args.use_cuda and torch.cuda.is_available()
30
+ if not use_cuda:
31
+ warnings.warn("Running in cpu mode which maybe very slow!", UserWarning)
32
+
33
+ if args.display:
34
+ cv2.namedWindow("test", cv2.WINDOW_NORMAL)
35
+ cv2.resizeWindow("test", args.display_width, args.display_height)
36
+
37
+ if args.cam != -1:
38
+ print("Using webcam " + str(args.cam))
39
+ self.vdo = cv2.VideoCapture(args.cam)
40
+ else:
41
+ self.vdo = cv2.VideoCapture()
42
+ self.detector = build_detector(cfg, use_cuda=use_cuda)
43
+ self.deepsort = build_tracker(cfg, use_cuda=use_cuda)
44
+ self.class_names = self.detector.class_names
45
+
46
+ def __enter__(self):
47
+ if self.args.cam != -1:
48
+ ret, frame = self.vdo.read()
49
+ assert ret, "Error: Camera error"
50
+ self.im_width = frame.shape[0]
51
+ self.im_height = frame.shape[1]
52
+
53
+ else:
54
+ assert os.path.isfile(self.video_path), "Path error"
55
+ self.vdo.open(self.video_path)
56
+ self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
57
+ self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
58
+ assert self.vdo.isOpened()
59
+
60
+ if self.args.save_path:
61
+ os.makedirs(self.args.save_path, exist_ok=True)
62
+
63
+ # path of saved video and results
64
+ self.save_video_path = os.path.join(self.args.save_path, "results.avi")
65
+ self.save_results_path = os.path.join(self.args.save_path, "results.txt")
66
+
67
+ # create video writer
68
+ fourcc = cv2.VideoWriter_fourcc(*'MJPG')
69
+ self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 20, (self.im_width, self.im_height))
70
+
71
+ # logging
72
+ self.logger.info("Save results to {}".format(self.args.save_path))
73
+
74
+ return self
75
+
76
+ def __exit__(self, exc_type, exc_value, exc_traceback):
77
+ if exc_type:
78
+ print(exc_type, exc_value, exc_traceback)
79
+
80
+ def run(self):
81
+ results = []
82
+ idx_frame = 0
83
+ while self.vdo.grab():
84
+ idx_frame += 1
85
+ if idx_frame % self.args.frame_interval:
86
+ continue
87
+
88
+ start = time.time()
89
+ _, ori_im = self.vdo.retrieve()
90
+ im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
91
+
92
+ # do detection
93
+ bbox_xywh, cls_conf, cls_ids = self.detector(im)
94
+
95
+ # select person class
96
+ mask = cls_ids == 0
97
+
98
+ bbox_xywh = bbox_xywh[mask]
99
+ # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
100
+ bbox_xywh[:, 3:] *= 1.2
101
+ cls_conf = cls_conf[mask]
102
+
103
+ # do tracking
104
+ outputs = self.deepsort.update(bbox_xywh, cls_conf, im)
105
+
106
+ # draw boxes for visualization
107
+ if len(outputs) > 0:
108
+ bbox_tlwh = []
109
+ bbox_xyxy = outputs[:, :4]
110
+ identities = outputs[:, -1]
111
+ ori_im = draw_boxes(ori_im, bbox_xyxy, identities)
112
+
113
+ for bb_xyxy in bbox_xyxy:
114
+ bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy))
115
+
116
+ results.append((idx_frame - 1, bbox_tlwh, identities))
117
+
118
+ end = time.time()
119
+
120
+ if self.args.display:
121
+ cv2.imshow("test", ori_im)
122
+ cv2.waitKey(1)
123
+
124
+ if self.args.save_path:
125
+ self.writer.write(ori_im)
126
+
127
+ # save results
128
+ write_results(self.save_results_path, results, 'mot')
129
+
130
+ # logging
131
+ self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \
132
+ .format(end - start, 1 / (end - start), bbox_xywh.shape[0], len(outputs)))
133
+
134
+
135
+ def parse_args():
136
+ parser = argparse.ArgumentParser()
137
+ parser.add_argument("VIDEO_PATH", type=str)
138
+ parser.add_argument("--config_mmdetection", type=str, default="./configs/mmdet.yaml")
139
+ parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yaml")
140
+ parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml")
141
+ parser.add_argument("--config_fastreid", type=str, default="./configs/fastreid.yaml")
142
+ parser.add_argument("--fastreid", action="store_true")
143
+ parser.add_argument("--mmdet", action="store_true")
144
+ # parser.add_argument("--ignore_display", dest="display", action="store_false", default=True)
145
+ parser.add_argument("--display", action="store_true")
146
+ parser.add_argument("--frame_interval", type=int, default=1)
147
+ parser.add_argument("--display_width", type=int, default=800)
148
+ parser.add_argument("--display_height", type=int, default=600)
149
+ parser.add_argument("--save_path", type=str, default="./output/")
150
+ parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True)
151
+ parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1")
152
+ return parser.parse_args()
153
+
154
+
155
+ if __name__ == "__main__":
156
+ args = parse_args()
157
+ cfg = get_config()
158
+ if args.mmdet:
159
+ cfg.merge_from_file(args.config_mmdetection)
160
+ cfg.USE_MMDET = True
161
+ else:
162
+ cfg.merge_from_file(args.config_detection)
163
+ cfg.USE_MMDET = False
164
+ cfg.merge_from_file(args.config_deepsort)
165
+ if args.fastreid:
166
+ cfg.merge_from_file(args.config_fastreid)
167
+ cfg.USE_FASTREID = True
168
+ else:
169
+ cfg.USE_FASTREID = False
170
+
171
+ with VideoTracker(cfg, args, video_path=args.VIDEO_PATH) as vdo_trk:
172
+ vdo_trk.run()
deep_sort_torch/detector/MMDet/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .detector import MMDet
2
+ __all__ = ['MMDet']
deep_sort_torch/detector/MMDet/detector.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import numpy as np
3
+ import torch
4
+
5
+ from mmdet.apis import init_detector, inference_detector
6
+ from .mmdet_utils import xyxy_to_xywh
7
+
8
+ class MMDet(object):
9
+ def __init__(self, cfg_file, checkpoint_file, score_thresh=0.7,
10
+ is_xywh=False, use_cuda=True):
11
+ # net definition
12
+ self.device = "cuda" if use_cuda else "cpu"
13
+ self.net = init_detector(cfg_file, checkpoint_file, device=self.device)
14
+ logger = logging.getLogger("root.detector")
15
+ logger.info('Loading weights from %s... Done!' % (checkpoint_file))
16
+
17
+ #constants
18
+ self.score_thresh = score_thresh
19
+ self.use_cuda = use_cuda
20
+ self.is_xywh = is_xywh
21
+ self.class_names = self.net.CLASSES
22
+ self.num_classes = len(self.class_names)
23
+
24
+ def __call__(self, ori_img):
25
+ # forward
26
+ bbox_result = inference_detector(self.net, ori_img)
27
+ bboxes = np.vstack(bbox_result)
28
+
29
+ if len(bboxes) == 0:
30
+ bbox = np.array([]).reshape([0, 4])
31
+ cls_conf = np.array([])
32
+ cls_ids = np.array([])
33
+ return bbox, cls_conf, cls_ids
34
+
35
+ bbox = bboxes[:, :4]
36
+ cls_conf = bboxes[:, 4]
37
+ cls_ids = [
38
+ np.full(bbox.shape[0], i, dtype=np.int32)
39
+ for i, bbox in enumerate(bbox_result)
40
+ ]
41
+ cls_ids = np.concatenate(cls_ids)
42
+
43
+ selected_idx = cls_conf > self.score_thresh
44
+ bbox = bbox[selected_idx, :]
45
+ cls_conf = cls_conf[selected_idx]
46
+ cls_ids = cls_ids[selected_idx]
47
+
48
+ if self.is_xywh:
49
+ bbox = xyxy_to_xywh(bbox)
50
+
51
+ return bbox, cls_conf, cls_ids
52
+
53
+
54
+
55
+
deep_sort_torch/detector/MMDet/mmdet_utils.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+
4
+ def xyxy_to_xywh(boxes_xyxy):
5
+ if isinstance(boxes_xyxy, torch.Tensor):
6
+ boxes_xywh = boxes_xyxy.clone()
7
+ elif isinstance(boxes_xyxy, np.ndarray):
8
+ boxes_xywh = boxes_xyxy.copy()
9
+
10
+ boxes_xywh[:, 0] = (boxes_xyxy[:, 0] + boxes_xyxy[:, 2]) / 2.
11
+ boxes_xywh[:, 1] = (boxes_xyxy[:, 1] + boxes_xyxy[:, 3]) / 2.
12
+ boxes_xywh[:, 2] = boxes_xyxy[:, 2] - boxes_xyxy[:, 0]
13
+ boxes_xywh[:, 3] = boxes_xyxy[:, 3] - boxes_xyxy[:, 1]
14
+
15
+ return boxes_xywh
deep_sort_torch/detector/YOLOv3/README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv3 for detection
2
+
3
+ This is an implemention of YOLOv3 with only the forward part.
4
+
5
+ If you want to train YOLOv3 on your custom dataset, please search `YOLOv3` on github.
6
+
7
+ ## Quick forward
8
+ ```bash
9
+ cd YOLOv3
10
+ python
11
+ ```
deep_sort_torch/detector/YOLOv3/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append("detector/YOLOv3")
3
+
4
+
5
+ from .detector import YOLOv3
6
+ __all__ = ['YOLOv3']
7
+
8
+
9
+
deep_sort_torch/detector/YOLOv3/cfg.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from .yolo_utils import convert2cpu
3
+
4
+
5
+ def parse_cfg(cfgfile):
6
+ blocks = []
7
+ fp = open(cfgfile)
8
+ block = None
9
+ line = fp.readline()
10
+ while line != '':
11
+ line = line.rstrip()
12
+ if line == '' or line[0] == '#':
13
+ line = fp.readline()
14
+ continue
15
+ elif line[0] == '[':
16
+ if block:
17
+ blocks.append(block)
18
+ block = dict()
19
+ block['type'] = line.lstrip('[').rstrip(']')
20
+ # set default value
21
+ if block['type'] == 'convolutional':
22
+ block['batch_normalize'] = 0
23
+ else:
24
+ key, value = line.split('=')
25
+ key = key.strip()
26
+ if key == 'type':
27
+ key = '_type'
28
+ value = value.strip()
29
+ block[key] = value
30
+ line = fp.readline()
31
+
32
+ if block:
33
+ blocks.append(block)
34
+ fp.close()
35
+ return blocks
36
+
37
+
38
+ def print_cfg(blocks):
39
+ print('layer filters size input output');
40
+ prev_width = 416
41
+ prev_height = 416
42
+ prev_filters = 3
43
+ out_filters = []
44
+ out_widths = []
45
+ out_heights = []
46
+ ind = -2
47
+ for block in blocks:
48
+ ind += 1
49
+ if block['type'] == 'net':
50
+ prev_width = int(block['width'])
51
+ prev_height = int(block['height'])
52
+ continue
53
+ elif block['type'] == 'convolutional':
54
+ filters = int(block['filters'])
55
+ kernel_size = int(block['size'])
56
+ stride = int(block['stride'])
57
+ is_pad = int(block['pad'])
58
+ pad = (kernel_size - 1) // 2 if is_pad else 0
59
+ width = (prev_width + 2 * pad - kernel_size) // stride + 1
60
+ height = (prev_height + 2 * pad - kernel_size) // stride + 1
61
+ print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
62
+ ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
63
+ height, filters))
64
+ prev_width = width
65
+ prev_height = height
66
+ prev_filters = filters
67
+ out_widths.append(prev_width)
68
+ out_heights.append(prev_height)
69
+ out_filters.append(prev_filters)
70
+ elif block['type'] == 'maxpool':
71
+ pool_size = int(block['size'])
72
+ stride = int(block['stride'])
73
+ width = prev_width // stride
74
+ height = prev_height // stride
75
+ print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
76
+ ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters))
77
+ prev_width = width
78
+ prev_height = height
79
+ prev_filters = filters
80
+ out_widths.append(prev_width)
81
+ out_heights.append(prev_height)
82
+ out_filters.append(prev_filters)
83
+ elif block['type'] == 'avgpool':
84
+ width = 1
85
+ height = 1
86
+ print('%5d %-6s %3d x %3d x%4d -> %3d' % (
87
+ ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
88
+ prev_width = width
89
+ prev_height = height
90
+ prev_filters = filters
91
+ out_widths.append(prev_width)
92
+ out_heights.append(prev_height)
93
+ out_filters.append(prev_filters)
94
+ elif block['type'] == 'softmax':
95
+ print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters))
96
+ out_widths.append(prev_width)
97
+ out_heights.append(prev_height)
98
+ out_filters.append(prev_filters)
99
+ elif block['type'] == 'cost':
100
+ print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters))
101
+ out_widths.append(prev_width)
102
+ out_heights.append(prev_height)
103
+ out_filters.append(prev_filters)
104
+ elif block['type'] == 'reorg':
105
+ stride = int(block['stride'])
106
+ filters = stride * stride * prev_filters
107
+ width = prev_width // stride
108
+ height = prev_height // stride
109
+ print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
110
+ ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
111
+ prev_width = width
112
+ prev_height = height
113
+ prev_filters = filters
114
+ out_widths.append(prev_width)
115
+ out_heights.append(prev_height)
116
+ out_filters.append(prev_filters)
117
+ elif block['type'] == 'upsample':
118
+ stride = int(block['stride'])
119
+ filters = prev_filters
120
+ width = prev_width * stride
121
+ height = prev_height * stride
122
+ print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
123
+ ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
124
+ prev_width = width
125
+ prev_height = height
126
+ prev_filters = filters
127
+ out_widths.append(prev_width)
128
+ out_heights.append(prev_height)
129
+ out_filters.append(prev_filters)
130
+ elif block['type'] == 'route':
131
+ layers = block['layers'].split(',')
132
+ layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
133
+ if len(layers) == 1:
134
+ print('%5d %-6s %d' % (ind, 'route', layers[0]))
135
+ prev_width = out_widths[layers[0]]
136
+ prev_height = out_heights[layers[0]]
137
+ prev_filters = out_filters[layers[0]]
138
+ elif len(layers) == 2:
139
+ print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
140
+ prev_width = out_widths[layers[0]]
141
+ prev_height = out_heights[layers[0]]
142
+ assert (prev_width == out_widths[layers[1]])
143
+ assert (prev_height == out_heights[layers[1]])
144
+ prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
145
+ out_widths.append(prev_width)
146
+ out_heights.append(prev_height)
147
+ out_filters.append(prev_filters)
148
+ elif block['type'] in ['region', 'yolo']:
149
+ print('%5d %-6s' % (ind, 'detection'))
150
+ out_widths.append(prev_width)
151
+ out_heights.append(prev_height)
152
+ out_filters.append(prev_filters)
153
+ elif block['type'] == 'shortcut':
154
+ from_id = int(block['from'])
155
+ from_id = from_id if from_id > 0 else from_id + ind
156
+ print('%5d %-6s %d' % (ind, 'shortcut', from_id))
157
+ prev_width = out_widths[from_id]
158
+ prev_height = out_heights[from_id]
159
+ prev_filters = out_filters[from_id]
160
+ out_widths.append(prev_width)
161
+ out_heights.append(prev_height)
162
+ out_filters.append(prev_filters)
163
+ elif block['type'] == 'connected':
164
+ filters = int(block['output'])
165
+ print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters))
166
+ prev_filters = filters
167
+ out_widths.append(1)
168
+ out_heights.append(1)
169
+ out_filters.append(prev_filters)
170
+ else:
171
+ print('unknown type %s' % (block['type']))
172
+
173
+
174
+ def load_conv(buf, start, conv_model):
175
+ num_w = conv_model.weight.numel()
176
+ num_b = conv_model.bias.numel()
177
+ # print("start: {}, num_w: {}, num_b: {}".format(start, num_w, num_b))
178
+ # by ysyun, use .view_as()
179
+ conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]).view_as(conv_model.bias.data));
180
+ start = start + num_b
181
+ conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data));
182
+ start = start + num_w
183
+ return start
184
+
185
+
186
+ def save_conv(fp, conv_model):
187
+ if conv_model.bias.is_cuda:
188
+ convert2cpu(conv_model.bias.data).numpy().tofile(fp)
189
+ convert2cpu(conv_model.weight.data).numpy().tofile(fp)
190
+ else:
191
+ conv_model.bias.data.numpy().tofile(fp)
192
+ conv_model.weight.data.numpy().tofile(fp)
193
+
194
+
195
+ def load_conv_bn(buf, start, conv_model, bn_model):
196
+ num_w = conv_model.weight.numel()
197
+ num_b = bn_model.bias.numel()
198
+ bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
199
+ start = start + num_b
200
+ bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]));
201
+ start = start + num_b
202
+ bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]));
203
+ start = start + num_b
204
+ bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]));
205
+ start = start + num_b
206
+ # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w
207
+ conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data));
208
+ start = start + num_w
209
+ return start
210
+
211
+
212
+ def save_conv_bn(fp, conv_model, bn_model):
213
+ if bn_model.bias.is_cuda:
214
+ convert2cpu(bn_model.bias.data).numpy().tofile(fp)
215
+ convert2cpu(bn_model.weight.data).numpy().tofile(fp)
216
+ convert2cpu(bn_model.running_mean).numpy().tofile(fp)
217
+ convert2cpu(bn_model.running_var).numpy().tofile(fp)
218
+ convert2cpu(conv_model.weight.data).numpy().tofile(fp)
219
+ else:
220
+ bn_model.bias.data.numpy().tofile(fp)
221
+ bn_model.weight.data.numpy().tofile(fp)
222
+ bn_model.running_mean.numpy().tofile(fp)
223
+ bn_model.running_var.numpy().tofile(fp)
224
+ conv_model.weight.data.numpy().tofile(fp)
225
+
226
+
227
+ def load_fc(buf, start, fc_model):
228
+ num_w = fc_model.weight.numel()
229
+ num_b = fc_model.bias.numel()
230
+ fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
231
+ start = start + num_b
232
+ fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]));
233
+ start = start + num_w
234
+ return start
235
+
236
+
237
+ def save_fc(fp, fc_model):
238
+ fc_model.bias.data.numpy().tofile(fp)
239
+ fc_model.weight.data.numpy().tofile(fp)
240
+
241
+
242
+ if __name__ == '__main__':
243
+ import sys
244
+
245
+ blocks = parse_cfg('cfg/yolo.cfg')
246
+ if len(sys.argv) == 2:
247
+ blocks = parse_cfg(sys.argv[1])
248
+ print_cfg(blocks)
deep_sort_torch/detector/YOLOv3/cfg/coco.data ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ train = coco_train.txt
2
+ valid = coco_test.txt
3
+ names = data/coco.names
4
+ backup = backup
5
+ gpus = 0,1,2,3
deep_sort_torch/detector/YOLOv3/cfg/coco.names ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorbike
5
+ aeroplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ sofa
59
+ pottedplant
60
+ bed
61
+ diningtable
62
+ toilet
63
+ tvmonitor
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
deep_sort_torch/detector/YOLOv3/cfg/darknet19_448.cfg ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=128
3
+ subdivisions=4
4
+ height=448
5
+ width=448
6
+ max_crop=512
7
+ channels=3
8
+ momentum=0.9
9
+ decay=0.0005
10
+
11
+ learning_rate=0.001
12
+ policy=poly
13
+ power=4
14
+ max_batches=100000
15
+
16
+ angle=7
17
+ hue = .1
18
+ saturation=.75
19
+ exposure=.75
20
+ aspect=.75
21
+
22
+ [convolutional]
23
+ batch_normalize=1
24
+ filters=32
25
+ size=3
26
+ stride=1
27
+ pad=1
28
+ activation=leaky
29
+
30
+ [maxpool]
31
+ size=2
32
+ stride=2
33
+
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=64
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=leaky
41
+
42
+ [maxpool]
43
+ size=2
44
+ stride=2
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=128
49
+ size=3
50
+ stride=1
51
+ pad=1
52
+ activation=leaky
53
+
54
+ [convolutional]
55
+ batch_normalize=1
56
+ filters=64
57
+ size=1
58
+ stride=1
59
+ pad=1
60
+ activation=leaky
61
+
62
+ [convolutional]
63
+ batch_normalize=1
64
+ filters=128
65
+ size=3
66
+ stride=1
67
+ pad=1
68
+ activation=leaky
69
+
70
+ [maxpool]
71
+ size=2
72
+ stride=2
73
+
74
+ [convolutional]
75
+ batch_normalize=1
76
+ filters=256
77
+ size=3
78
+ stride=1
79
+ pad=1
80
+ activation=leaky
81
+
82
+ [convolutional]
83
+ batch_normalize=1
84
+ filters=128
85
+ size=1
86
+ stride=1
87
+ pad=1
88
+ activation=leaky
89
+
90
+ [convolutional]
91
+ batch_normalize=1
92
+ filters=256
93
+ size=3
94
+ stride=1
95
+ pad=1
96
+ activation=leaky
97
+
98
+ [maxpool]
99
+ size=2
100
+ stride=2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=512
105
+ size=3
106
+ stride=1
107
+ pad=1
108
+ activation=leaky
109
+
110
+ [convolutional]
111
+ batch_normalize=1
112
+ filters=256
113
+ size=1
114
+ stride=1
115
+ pad=1
116
+ activation=leaky
117
+
118
+ [convolutional]
119
+ batch_normalize=1
120
+ filters=512
121
+ size=3
122
+ stride=1
123
+ pad=1
124
+ activation=leaky
125
+
126
+ [convolutional]
127
+ batch_normalize=1
128
+ filters=256
129
+ size=1
130
+ stride=1
131
+ pad=1
132
+ activation=leaky
133
+
134
+ [convolutional]
135
+ batch_normalize=1
136
+ filters=512
137
+ size=3
138
+ stride=1
139
+ pad=1
140
+ activation=leaky
141
+
142
+ [maxpool]
143
+ size=2
144
+ stride=2
145
+
146
+ [convolutional]
147
+ batch_normalize=1
148
+ filters=1024
149
+ size=3
150
+ stride=1
151
+ pad=1
152
+ activation=leaky
153
+
154
+ [convolutional]
155
+ batch_normalize=1
156
+ filters=512
157
+ size=1
158
+ stride=1
159
+ pad=1
160
+ activation=leaky
161
+
162
+ [convolutional]
163
+ batch_normalize=1
164
+ filters=1024
165
+ size=3
166
+ stride=1
167
+ pad=1
168
+ activation=leaky
169
+
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=512
173
+ size=1
174
+ stride=1
175
+ pad=1
176
+ activation=leaky
177
+
178
+ [convolutional]
179
+ batch_normalize=1
180
+ filters=1024
181
+ size=3
182
+ stride=1
183
+ pad=1
184
+ activation=leaky
185
+
186
+ [convolutional]
187
+ filters=1000
188
+ size=1
189
+ stride=1
190
+ pad=1
191
+ activation=linear
192
+
193
+ [avgpool]
194
+
195
+ [softmax]
196
+ groups=1
197
+
198
+ [cost]
199
+ type=sse
200
+
deep_sort_torch/detector/YOLOv3/cfg/tiny-yolo-voc.cfg ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=64
3
+ subdivisions=8
4
+ width=416
5
+ height=416
6
+ channels=3
7
+ momentum=0.9
8
+ decay=0.0005
9
+ angle=0
10
+ saturation = 1.5
11
+ exposure = 1.5
12
+ hue=.1
13
+
14
+ learning_rate=0.001
15
+ max_batches = 40200
16
+ policy=steps
17
+ steps=-1,100,20000,30000
18
+ scales=.1,10,.1,.1
19
+
20
+ [convolutional]
21
+ batch_normalize=1
22
+ filters=16
23
+ size=3
24
+ stride=1
25
+ pad=1
26
+ activation=leaky
27
+
28
+ [maxpool]
29
+ size=2
30
+ stride=2
31
+
32
+ [convolutional]
33
+ batch_normalize=1
34
+ filters=32
35
+ size=3
36
+ stride=1
37
+ pad=1
38
+ activation=leaky
39
+
40
+ [maxpool]
41
+ size=2
42
+ stride=2
43
+
44
+ [convolutional]
45
+ batch_normalize=1
46
+ filters=64
47
+ size=3
48
+ stride=1
49
+ pad=1
50
+ activation=leaky
51
+
52
+ [maxpool]
53
+ size=2
54
+ stride=2
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=128
59
+ size=3
60
+ stride=1
61
+ pad=1
62
+ activation=leaky
63
+
64
+ [maxpool]
65
+ size=2
66
+ stride=2
67
+
68
+ [convolutional]
69
+ batch_normalize=1
70
+ filters=256
71
+ size=3
72
+ stride=1
73
+ pad=1
74
+ activation=leaky
75
+
76
+ [maxpool]
77
+ size=2
78
+ stride=2
79
+
80
+ [convolutional]
81
+ batch_normalize=1
82
+ filters=512
83
+ size=3
84
+ stride=1
85
+ pad=1
86
+ activation=leaky
87
+
88
+ [maxpool]
89
+ size=2
90
+ stride=1
91
+
92
+ [convolutional]
93
+ batch_normalize=1
94
+ filters=1024
95
+ size=3
96
+ stride=1
97
+ pad=1
98
+ activation=leaky
99
+
100
+ ###########
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ size=3
105
+ stride=1
106
+ pad=1
107
+ filters=1024
108
+ activation=leaky
109
+
110
+ [convolutional]
111
+ size=1
112
+ stride=1
113
+ pad=1
114
+ filters=125
115
+ activation=linear
116
+
117
+ [region]
118
+ anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52
119
+ bias_match=1
120
+ classes=20
121
+ coords=4
122
+ num=5
123
+ softmax=1
124
+ jitter=.2
125
+ rescore=1
126
+
127
+ object_scale=5
128
+ noobject_scale=1
129
+ class_scale=1
130
+ coord_scale=1
131
+
132
+ absolute=1
133
+ thresh = .6
134
+ random=1
deep_sort_torch/detector/YOLOv3/cfg/tiny-yolo.cfg ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Training
3
+ # batch=64
4
+ # subdivisions=2
5
+ # Testing
6
+ batch=1
7
+ subdivisions=1
8
+ width=416
9
+ height=416
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=16
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ [maxpool]
34
+ size=2
35
+ stride=2
36
+
37
+ [convolutional]
38
+ batch_normalize=1
39
+ filters=32
40
+ size=3
41
+ stride=1
42
+ pad=1
43
+ activation=leaky
44
+
45
+ [maxpool]
46
+ size=2
47
+ stride=2
48
+
49
+ [convolutional]
50
+ batch_normalize=1
51
+ filters=64
52
+ size=3
53
+ stride=1
54
+ pad=1
55
+ activation=leaky
56
+
57
+ [maxpool]
58
+ size=2
59
+ stride=2
60
+
61
+ [convolutional]
62
+ batch_normalize=1
63
+ filters=128
64
+ size=3
65
+ stride=1
66
+ pad=1
67
+ activation=leaky
68
+
69
+ [maxpool]
70
+ size=2
71
+ stride=2
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=256
76
+ size=3
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [maxpool]
82
+ size=2
83
+ stride=2
84
+
85
+ [convolutional]
86
+ batch_normalize=1
87
+ filters=512
88
+ size=3
89
+ stride=1
90
+ pad=1
91
+ activation=leaky
92
+
93
+ [maxpool]
94
+ size=2
95
+ stride=1
96
+
97
+ [convolutional]
98
+ batch_normalize=1
99
+ filters=1024
100
+ size=3
101
+ stride=1
102
+ pad=1
103
+ activation=leaky
104
+
105
+ ###########
106
+
107
+ [convolutional]
108
+ batch_normalize=1
109
+ size=3
110
+ stride=1
111
+ pad=1
112
+ filters=512
113
+ activation=leaky
114
+
115
+ [convolutional]
116
+ size=1
117
+ stride=1
118
+ pad=1
119
+ filters=425
120
+ activation=linear
121
+
122
+ [region]
123
+ anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
124
+ bias_match=1
125
+ classes=80
126
+ coords=4
127
+ num=5
128
+ softmax=1
129
+ jitter=.2
130
+ rescore=0
131
+
132
+ object_scale=5
133
+ noobject_scale=1
134
+ class_scale=1
135
+ coord_scale=1
136
+
137
+ absolute=1
138
+ thresh = .6
139
+ random=1
140
+
deep_sort_torch/detector/YOLOv3/cfg/voc.data ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ train = data/voc_train.txt
2
+ valid = data/2007_test.txt
3
+ names = data/voc.names
4
+ backup = backup
5
+ gpus = 3
deep_sort_torch/detector/YOLOv3/cfg/voc.names ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aeroplane
2
+ bicycle
3
+ bird
4
+ boat
5
+ bottle
6
+ bus
7
+ car
8
+ cat
9
+ chair
10
+ cow
11
+ diningtable
12
+ dog
13
+ horse
14
+ motorbike
15
+ person
16
+ pottedplant
17
+ sheep
18
+ sofa
19
+ train
20
+ tvmonitor
deep_sort_torch/detector/YOLOv3/cfg/voc_gaotie.data ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ train = data/gaotie_trainval.txt
2
+ valid = data/gaotie_test.txt
3
+ names = data/voc.names
4
+ backup = backup
5
+ gpus = 3
deep_sort_torch/detector/YOLOv3/cfg/yolo-voc.cfg ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ batch=64
4
+ subdivisions=8
5
+ # Training
6
+ # batch=64
7
+ # subdivisions=8
8
+ height=416
9
+ width=416
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 80200
21
+ policy=steps
22
+ steps=-1,500,40000,60000
23
+ scales=0.1,10,.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=32
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ [maxpool]
34
+ size=2
35
+ stride=2
36
+
37
+ [convolutional]
38
+ batch_normalize=1
39
+ filters=64
40
+ size=3
41
+ stride=1
42
+ pad=1
43
+ activation=leaky
44
+
45
+ [maxpool]
46
+ size=2
47
+ stride=2
48
+
49
+ [convolutional]
50
+ batch_normalize=1
51
+ filters=128
52
+ size=3
53
+ stride=1
54
+ pad=1
55
+ activation=leaky
56
+
57
+ [convolutional]
58
+ batch_normalize=1
59
+ filters=64
60
+ size=1
61
+ stride=1
62
+ pad=1
63
+ activation=leaky
64
+
65
+ [convolutional]
66
+ batch_normalize=1
67
+ filters=128
68
+ size=3
69
+ stride=1
70
+ pad=1
71
+ activation=leaky
72
+
73
+ [maxpool]
74
+ size=2
75
+ stride=2
76
+
77
+ [convolutional]
78
+ batch_normalize=1
79
+ filters=256
80
+ size=3
81
+ stride=1
82
+ pad=1
83
+ activation=leaky
84
+
85
+ [convolutional]
86
+ batch_normalize=1
87
+ filters=128
88
+ size=1
89
+ stride=1
90
+ pad=1
91
+ activation=leaky
92
+
93
+ [convolutional]
94
+ batch_normalize=1
95
+ filters=256
96
+ size=3
97
+ stride=1
98
+ pad=1
99
+ activation=leaky
100
+
101
+ [maxpool]
102
+ size=2
103
+ stride=2
104
+
105
+ [convolutional]
106
+ batch_normalize=1
107
+ filters=512
108
+ size=3
109
+ stride=1
110
+ pad=1
111
+ activation=leaky
112
+
113
+ [convolutional]
114
+ batch_normalize=1
115
+ filters=256
116
+ size=1
117
+ stride=1
118
+ pad=1
119
+ activation=leaky
120
+
121
+ [convolutional]
122
+ batch_normalize=1
123
+ filters=512
124
+ size=3
125
+ stride=1
126
+ pad=1
127
+ activation=leaky
128
+
129
+ [convolutional]
130
+ batch_normalize=1
131
+ filters=256
132
+ size=1
133
+ stride=1
134
+ pad=1
135
+ activation=leaky
136
+
137
+ [convolutional]
138
+ batch_normalize=1
139
+ filters=512
140
+ size=3
141
+ stride=1
142
+ pad=1
143
+ activation=leaky
144
+
145
+ [maxpool]
146
+ size=2
147
+ stride=2
148
+
149
+ [convolutional]
150
+ batch_normalize=1
151
+ filters=1024
152
+ size=3
153
+ stride=1
154
+ pad=1
155
+ activation=leaky
156
+
157
+ [convolutional]
158
+ batch_normalize=1
159
+ filters=512
160
+ size=1
161
+ stride=1
162
+ pad=1
163
+ activation=leaky
164
+
165
+ [convolutional]
166
+ batch_normalize=1
167
+ filters=1024
168
+ size=3
169
+ stride=1
170
+ pad=1
171
+ activation=leaky
172
+
173
+ [convolutional]
174
+ batch_normalize=1
175
+ filters=512
176
+ size=1
177
+ stride=1
178
+ pad=1
179
+ activation=leaky
180
+
181
+ [convolutional]
182
+ batch_normalize=1
183
+ filters=1024
184
+ size=3
185
+ stride=1
186
+ pad=1
187
+ activation=leaky
188
+
189
+
190
+ #######
191
+
192
+ [convolutional]
193
+ batch_normalize=1
194
+ size=3
195
+ stride=1
196
+ pad=1
197
+ filters=1024
198
+ activation=leaky
199
+
200
+ [convolutional]
201
+ batch_normalize=1
202
+ size=3
203
+ stride=1
204
+ pad=1
205
+ filters=1024
206
+ activation=leaky
207
+
208
+ [route]
209
+ layers=-9
210
+
211
+ [convolutional]
212
+ batch_normalize=1
213
+ size=1
214
+ stride=1
215
+ pad=1
216
+ filters=64
217
+ activation=leaky
218
+
219
+ [reorg]
220
+ stride=2
221
+
222
+ [route]
223
+ layers=-1,-4
224
+
225
+ [convolutional]
226
+ batch_normalize=1
227
+ size=3
228
+ stride=1
229
+ pad=1
230
+ filters=1024
231
+ activation=leaky
232
+
233
+ [convolutional]
234
+ size=1
235
+ stride=1
236
+ pad=1
237
+ filters=125
238
+ activation=linear
239
+
240
+
241
+ [region]
242
+ anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243
+ bias_match=1
244
+ classes=20
245
+ coords=4
246
+ num=5
247
+ softmax=1
248
+ jitter=.3
249
+ rescore=1
250
+
251
+ object_scale=5
252
+ noobject_scale=1
253
+ class_scale=1
254
+ coord_scale=1
255
+
256
+ absolute=1
257
+ thresh = .6
258
+ random=1
deep_sort_torch/detector/YOLOv3/cfg/yolo.cfg ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ batch=1
4
+ subdivisions=1
5
+ # Training
6
+ # batch=64
7
+ # subdivisions=8
8
+ width=416
9
+ height=416
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=32
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ [maxpool]
34
+ size=2
35
+ stride=2
36
+
37
+ [convolutional]
38
+ batch_normalize=1
39
+ filters=64
40
+ size=3
41
+ stride=1
42
+ pad=1
43
+ activation=leaky
44
+
45
+ [maxpool]
46
+ size=2
47
+ stride=2
48
+
49
+ [convolutional]
50
+ batch_normalize=1
51
+ filters=128
52
+ size=3
53
+ stride=1
54
+ pad=1
55
+ activation=leaky
56
+
57
+ [convolutional]
58
+ batch_normalize=1
59
+ filters=64
60
+ size=1
61
+ stride=1
62
+ pad=1
63
+ activation=leaky
64
+
65
+ [convolutional]
66
+ batch_normalize=1
67
+ filters=128
68
+ size=3
69
+ stride=1
70
+ pad=1
71
+ activation=leaky
72
+
73
+ [maxpool]
74
+ size=2
75
+ stride=2
76
+
77
+ [convolutional]
78
+ batch_normalize=1
79
+ filters=256
80
+ size=3
81
+ stride=1
82
+ pad=1
83
+ activation=leaky
84
+
85
+ [convolutional]
86
+ batch_normalize=1
87
+ filters=128
88
+ size=1
89
+ stride=1
90
+ pad=1
91
+ activation=leaky
92
+
93
+ [convolutional]
94
+ batch_normalize=1
95
+ filters=256
96
+ size=3
97
+ stride=1
98
+ pad=1
99
+ activation=leaky
100
+
101
+ [maxpool]
102
+ size=2
103
+ stride=2
104
+
105
+ [convolutional]
106
+ batch_normalize=1
107
+ filters=512
108
+ size=3
109
+ stride=1
110
+ pad=1
111
+ activation=leaky
112
+
113
+ [convolutional]
114
+ batch_normalize=1
115
+ filters=256
116
+ size=1
117
+ stride=1
118
+ pad=1
119
+ activation=leaky
120
+
121
+ [convolutional]
122
+ batch_normalize=1
123
+ filters=512
124
+ size=3
125
+ stride=1
126
+ pad=1
127
+ activation=leaky
128
+
129
+ [convolutional]
130
+ batch_normalize=1
131
+ filters=256
132
+ size=1
133
+ stride=1
134
+ pad=1
135
+ activation=leaky
136
+
137
+ [convolutional]
138
+ batch_normalize=1
139
+ filters=512
140
+ size=3
141
+ stride=1
142
+ pad=1
143
+ activation=leaky
144
+
145
+ [maxpool]
146
+ size=2
147
+ stride=2
148
+
149
+ [convolutional]
150
+ batch_normalize=1
151
+ filters=1024
152
+ size=3
153
+ stride=1
154
+ pad=1
155
+ activation=leaky
156
+
157
+ [convolutional]
158
+ batch_normalize=1
159
+ filters=512
160
+ size=1
161
+ stride=1
162
+ pad=1
163
+ activation=leaky
164
+
165
+ [convolutional]
166
+ batch_normalize=1
167
+ filters=1024
168
+ size=3
169
+ stride=1
170
+ pad=1
171
+ activation=leaky
172
+
173
+ [convolutional]
174
+ batch_normalize=1
175
+ filters=512
176
+ size=1
177
+ stride=1
178
+ pad=1
179
+ activation=leaky
180
+
181
+ [convolutional]
182
+ batch_normalize=1
183
+ filters=1024
184
+ size=3
185
+ stride=1
186
+ pad=1
187
+ activation=leaky
188
+
189
+
190
+ #######
191
+
192
+ [convolutional]
193
+ batch_normalize=1
194
+ size=3
195
+ stride=1
196
+ pad=1
197
+ filters=1024
198
+ activation=leaky
199
+
200
+ [convolutional]
201
+ batch_normalize=1
202
+ size=3
203
+ stride=1
204
+ pad=1
205
+ filters=1024
206
+ activation=leaky
207
+
208
+ [route]
209
+ layers=-9
210
+
211
+ [convolutional]
212
+ batch_normalize=1
213
+ size=1
214
+ stride=1
215
+ pad=1
216
+ filters=64
217
+ activation=leaky
218
+
219
+ [reorg]
220
+ stride=2
221
+
222
+ [route]
223
+ layers=-1,-4
224
+
225
+ [convolutional]
226
+ batch_normalize=1
227
+ size=3
228
+ stride=1
229
+ pad=1
230
+ filters=1024
231
+ activation=leaky
232
+
233
+ [convolutional]
234
+ size=1
235
+ stride=1
236
+ pad=1
237
+ filters=425
238
+ activation=linear
239
+
240
+
241
+ [region]
242
+ anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243
+ bias_match=1
244
+ classes=80
245
+ coords=4
246
+ num=5
247
+ softmax=1
248
+ jitter=.3
249
+ rescore=1
250
+
251
+ object_scale=5
252
+ noobject_scale=1
253
+ class_scale=1
254
+ coord_scale=1
255
+
256
+ absolute=1
257
+ thresh = .6
258
+ random=1
deep_sort_torch/detector/YOLOv3/cfg/yolo_v3.cfg ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=16
7
+ subdivisions=4
8
+ width=416
9
+ height=416
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=20,25
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=32
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ # Downsample
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=2
40
+ pad=1
41
+ activation=leaky
42
+
43
+ [convolutional]
44
+ batch_normalize=1
45
+ filters=32
46
+ size=1
47
+ stride=1
48
+ pad=1
49
+ activation=leaky
50
+
51
+ [convolutional]
52
+ batch_normalize=1
53
+ filters=64
54
+ size=3
55
+ stride=1
56
+ pad=1
57
+ activation=leaky
58
+
59
+ [shortcut]
60
+ from=-3
61
+ activation=linear
62
+
63
+ # Downsample
64
+
65
+ [convolutional]
66
+ batch_normalize=1
67
+ filters=128
68
+ size=3
69
+ stride=2
70
+ pad=1
71
+ activation=leaky
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=64
76
+ size=1
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=1
86
+ pad=1
87
+ activation=leaky
88
+
89
+ [shortcut]
90
+ from=-3
91
+ activation=linear
92
+
93
+ [convolutional]
94
+ batch_normalize=1
95
+ filters=64
96
+ size=1
97
+ stride=1
98
+ pad=1
99
+ activation=leaky
100
+
101
+ [convolutional]
102
+ batch_normalize=1
103
+ filters=128
104
+ size=3
105
+ stride=1
106
+ pad=1
107
+ activation=leaky
108
+
109
+ [shortcut]
110
+ from=-3
111
+ activation=linear
112
+
113
+ # Downsample
114
+
115
+ [convolutional]
116
+ batch_normalize=1
117
+ filters=256
118
+ size=3
119
+ stride=2
120
+ pad=1
121
+ activation=leaky
122
+
123
+ [convolutional]
124
+ batch_normalize=1
125
+ filters=128
126
+ size=1
127
+ stride=1
128
+ pad=1
129
+ activation=leaky
130
+
131
+ [convolutional]
132
+ batch_normalize=1
133
+ filters=256
134
+ size=3
135
+ stride=1
136
+ pad=1
137
+ activation=leaky
138
+
139
+ [shortcut]
140
+ from=-3
141
+ activation=linear
142
+
143
+ [convolutional]
144
+ batch_normalize=1
145
+ filters=128
146
+ size=1
147
+ stride=1
148
+ pad=1
149
+ activation=leaky
150
+
151
+ [convolutional]
152
+ batch_normalize=1
153
+ filters=256
154
+ size=3
155
+ stride=1
156
+ pad=1
157
+ activation=leaky
158
+
159
+ [shortcut]
160
+ from=-3
161
+ activation=linear
162
+
163
+ [convolutional]
164
+ batch_normalize=1
165
+ filters=128
166
+ size=1
167
+ stride=1
168
+ pad=1
169
+ activation=leaky
170
+
171
+ [convolutional]
172
+ batch_normalize=1
173
+ filters=256
174
+ size=3
175
+ stride=1
176
+ pad=1
177
+ activation=leaky
178
+
179
+ [shortcut]
180
+ from=-3
181
+ activation=linear
182
+
183
+ [convolutional]
184
+ batch_normalize=1
185
+ filters=128
186
+ size=1
187
+ stride=1
188
+ pad=1
189
+ activation=leaky
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=256
194
+ size=3
195
+ stride=1
196
+ pad=1
197
+ activation=leaky
198
+
199
+ [shortcut]
200
+ from=-3
201
+ activation=linear
202
+
203
+
204
+ [convolutional]
205
+ batch_normalize=1
206
+ filters=128
207
+ size=1
208
+ stride=1
209
+ pad=1
210
+ activation=leaky
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=256
215
+ size=3
216
+ stride=1
217
+ pad=1
218
+ activation=leaky
219
+
220
+ [shortcut]
221
+ from=-3
222
+ activation=linear
223
+
224
+ [convolutional]
225
+ batch_normalize=1
226
+ filters=128
227
+ size=1
228
+ stride=1
229
+ pad=1
230
+ activation=leaky
231
+
232
+ [convolutional]
233
+ batch_normalize=1
234
+ filters=256
235
+ size=3
236
+ stride=1
237
+ pad=1
238
+ activation=leaky
239
+
240
+ [shortcut]
241
+ from=-3
242
+ activation=linear
243
+
244
+ [convolutional]
245
+ batch_normalize=1
246
+ filters=128
247
+ size=1
248
+ stride=1
249
+ pad=1
250
+ activation=leaky
251
+
252
+ [convolutional]
253
+ batch_normalize=1
254
+ filters=256
255
+ size=3
256
+ stride=1
257
+ pad=1
258
+ activation=leaky
259
+
260
+ [shortcut]
261
+ from=-3
262
+ activation=linear
263
+
264
+ [convolutional]
265
+ batch_normalize=1
266
+ filters=128
267
+ size=1
268
+ stride=1
269
+ pad=1
270
+ activation=leaky
271
+
272
+ [convolutional]
273
+ batch_normalize=1
274
+ filters=256
275
+ size=3
276
+ stride=1
277
+ pad=1
278
+ activation=leaky
279
+
280
+ [shortcut]
281
+ from=-3
282
+ activation=linear
283
+
284
+ # Downsample
285
+
286
+ [convolutional]
287
+ batch_normalize=1
288
+ filters=512
289
+ size=3
290
+ stride=2
291
+ pad=1
292
+ activation=leaky
293
+
294
+ [convolutional]
295
+ batch_normalize=1
296
+ filters=256
297
+ size=1
298
+ stride=1
299
+ pad=1
300
+ activation=leaky
301
+
302
+ [convolutional]
303
+ batch_normalize=1
304
+ filters=512
305
+ size=3
306
+ stride=1
307
+ pad=1
308
+ activation=leaky
309
+
310
+ [shortcut]
311
+ from=-3
312
+ activation=linear
313
+
314
+
315
+ [convolutional]
316
+ batch_normalize=1
317
+ filters=256
318
+ size=1
319
+ stride=1
320
+ pad=1
321
+ activation=leaky
322
+
323
+ [convolutional]
324
+ batch_normalize=1
325
+ filters=512
326
+ size=3
327
+ stride=1
328
+ pad=1
329
+ activation=leaky
330
+
331
+ [shortcut]
332
+ from=-3
333
+ activation=linear
334
+
335
+
336
+ [convolutional]
337
+ batch_normalize=1
338
+ filters=256
339
+ size=1
340
+ stride=1
341
+ pad=1
342
+ activation=leaky
343
+
344
+ [convolutional]
345
+ batch_normalize=1
346
+ filters=512
347
+ size=3
348
+ stride=1
349
+ pad=1
350
+ activation=leaky
351
+
352
+ [shortcut]
353
+ from=-3
354
+ activation=linear
355
+
356
+
357
+ [convolutional]
358
+ batch_normalize=1
359
+ filters=256
360
+ size=1
361
+ stride=1
362
+ pad=1
363
+ activation=leaky
364
+
365
+ [convolutional]
366
+ batch_normalize=1
367
+ filters=512
368
+ size=3
369
+ stride=1
370
+ pad=1
371
+ activation=leaky
372
+
373
+ [shortcut]
374
+ from=-3
375
+ activation=linear
376
+
377
+ [convolutional]
378
+ batch_normalize=1
379
+ filters=256
380
+ size=1
381
+ stride=1
382
+ pad=1
383
+ activation=leaky
384
+
385
+ [convolutional]
386
+ batch_normalize=1
387
+ filters=512
388
+ size=3
389
+ stride=1
390
+ pad=1
391
+ activation=leaky
392
+
393
+ [shortcut]
394
+ from=-3
395
+ activation=linear
396
+
397
+
398
+ [convolutional]
399
+ batch_normalize=1
400
+ filters=256
401
+ size=1
402
+ stride=1
403
+ pad=1
404
+ activation=leaky
405
+
406
+ [convolutional]
407
+ batch_normalize=1
408
+ filters=512
409
+ size=3
410
+ stride=1
411
+ pad=1
412
+ activation=leaky
413
+
414
+ [shortcut]
415
+ from=-3
416
+ activation=linear
417
+
418
+
419
+ [convolutional]
420
+ batch_normalize=1
421
+ filters=256
422
+ size=1
423
+ stride=1
424
+ pad=1
425
+ activation=leaky
426
+
427
+ [convolutional]
428
+ batch_normalize=1
429
+ filters=512
430
+ size=3
431
+ stride=1
432
+ pad=1
433
+ activation=leaky
434
+
435
+ [shortcut]
436
+ from=-3
437
+ activation=linear
438
+
439
+ [convolutional]
440
+ batch_normalize=1
441
+ filters=256
442
+ size=1
443
+ stride=1
444
+ pad=1
445
+ activation=leaky
446
+
447
+ [convolutional]
448
+ batch_normalize=1
449
+ filters=512
450
+ size=3
451
+ stride=1
452
+ pad=1
453
+ activation=leaky
454
+
455
+ [shortcut]
456
+ from=-3
457
+ activation=linear
458
+
459
+ # Downsample
460
+
461
+ [convolutional]
462
+ batch_normalize=1
463
+ filters=1024
464
+ size=3
465
+ stride=2
466
+ pad=1
467
+ activation=leaky
468
+
469
+ [convolutional]
470
+ batch_normalize=1
471
+ filters=512
472
+ size=1
473
+ stride=1
474
+ pad=1
475
+ activation=leaky
476
+
477
+ [convolutional]
478
+ batch_normalize=1
479
+ filters=1024
480
+ size=3
481
+ stride=1
482
+ pad=1
483
+ activation=leaky
484
+
485
+ [shortcut]
486
+ from=-3
487
+ activation=linear
488
+
489
+ [convolutional]
490
+ batch_normalize=1
491
+ filters=512
492
+ size=1
493
+ stride=1
494
+ pad=1
495
+ activation=leaky
496
+
497
+ [convolutional]
498
+ batch_normalize=1
499
+ filters=1024
500
+ size=3
501
+ stride=1
502
+ pad=1
503
+ activation=leaky
504
+
505
+ [shortcut]
506
+ from=-3
507
+ activation=linear
508
+
509
+ [convolutional]
510
+ batch_normalize=1
511
+ filters=512
512
+ size=1
513
+ stride=1
514
+ pad=1
515
+ activation=leaky
516
+
517
+ [convolutional]
518
+ batch_normalize=1
519
+ filters=1024
520
+ size=3
521
+ stride=1
522
+ pad=1
523
+ activation=leaky
524
+
525
+ [shortcut]
526
+ from=-3
527
+ activation=linear
528
+
529
+ [convolutional]
530
+ batch_normalize=1
531
+ filters=512
532
+ size=1
533
+ stride=1
534
+ pad=1
535
+ activation=leaky
536
+
537
+ [convolutional]
538
+ batch_normalize=1
539
+ filters=1024
540
+ size=3
541
+ stride=1
542
+ pad=1
543
+ activation=leaky
544
+
545
+ [shortcut]
546
+ from=-3
547
+ activation=linear
548
+
549
+ ######################
550
+
551
+ [convolutional]
552
+ batch_normalize=1
553
+ filters=512
554
+ size=1
555
+ stride=1
556
+ pad=1
557
+ activation=leaky
558
+
559
+ [convolutional]
560
+ batch_normalize=1
561
+ size=3
562
+ stride=1
563
+ pad=1
564
+ filters=1024
565
+ activation=leaky
566
+
567
+ [convolutional]
568
+ batch_normalize=1
569
+ filters=512
570
+ size=1
571
+ stride=1
572
+ pad=1
573
+ activation=leaky
574
+
575
+ [convolutional]
576
+ batch_normalize=1
577
+ size=3
578
+ stride=1
579
+ pad=1
580
+ filters=1024
581
+ activation=leaky
582
+
583
+ [convolutional]
584
+ batch_normalize=1
585
+ filters=512
586
+ size=1
587
+ stride=1
588
+ pad=1
589
+ activation=leaky
590
+
591
+ [convolutional]
592
+ batch_normalize=1
593
+ size=3
594
+ stride=1
595
+ pad=1
596
+ filters=1024
597
+ activation=leaky
598
+
599
+ [convolutional]
600
+ size=1
601
+ stride=1
602
+ pad=1
603
+ filters=255
604
+ activation=linear
605
+
606
+
607
+ [yolo]
608
+ mask = 6,7,8
609
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610
+ classes=80
611
+ num=9
612
+ jitter=.3
613
+ ignore_thresh = .5
614
+ truth_thresh = 1
615
+ random=1
616
+
617
+
618
+ [route]
619
+ layers = -4
620
+
621
+ [convolutional]
622
+ batch_normalize=1
623
+ filters=256
624
+ size=1
625
+ stride=1
626
+ pad=1
627
+ activation=leaky
628
+
629
+ [upsample]
630
+ stride=2
631
+
632
+ [route]
633
+ layers = -1, 61
634
+
635
+
636
+
637
+ [convolutional]
638
+ batch_normalize=1
639
+ filters=256
640
+ size=1
641
+ stride=1
642
+ pad=1
643
+ activation=leaky
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ size=3
648
+ stride=1
649
+ pad=1
650
+ filters=512
651
+ activation=leaky
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=256
656
+ size=1
657
+ stride=1
658
+ pad=1
659
+ activation=leaky
660
+
661
+ [convolutional]
662
+ batch_normalize=1
663
+ size=3
664
+ stride=1
665
+ pad=1
666
+ filters=512
667
+ activation=leaky
668
+
669
+ [convolutional]
670
+ batch_normalize=1
671
+ filters=256
672
+ size=1
673
+ stride=1
674
+ pad=1
675
+ activation=leaky
676
+
677
+ [convolutional]
678
+ batch_normalize=1
679
+ size=3
680
+ stride=1
681
+ pad=1
682
+ filters=512
683
+ activation=leaky
684
+
685
+ [convolutional]
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ filters=255
690
+ activation=linear
691
+
692
+
693
+ [yolo]
694
+ mask = 3,4,5
695
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696
+ classes=80
697
+ num=9
698
+ jitter=.3
699
+ ignore_thresh = .5
700
+ truth_thresh = 1
701
+ random=1
702
+
703
+
704
+
705
+ [route]
706
+ layers = -4
707
+
708
+ [convolutional]
709
+ batch_normalize=1
710
+ filters=128
711
+ size=1
712
+ stride=1
713
+ pad=1
714
+ activation=leaky
715
+
716
+ [upsample]
717
+ stride=2
718
+
719
+ [route]
720
+ layers = -1, 36
721
+
722
+
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=128
727
+ size=1
728
+ stride=1
729
+ pad=1
730
+ activation=leaky
731
+
732
+ [convolutional]
733
+ batch_normalize=1
734
+ size=3
735
+ stride=1
736
+ pad=1
737
+ filters=256
738
+ activation=leaky
739
+
740
+ [convolutional]
741
+ batch_normalize=1
742
+ filters=128
743
+ size=1
744
+ stride=1
745
+ pad=1
746
+ activation=leaky
747
+
748
+ [convolutional]
749
+ batch_normalize=1
750
+ size=3
751
+ stride=1
752
+ pad=1
753
+ filters=256
754
+ activation=leaky
755
+
756
+ [convolutional]
757
+ batch_normalize=1
758
+ filters=128
759
+ size=1
760
+ stride=1
761
+ pad=1
762
+ activation=leaky
763
+
764
+ [convolutional]
765
+ batch_normalize=1
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ filters=256
770
+ activation=leaky
771
+
772
+ [convolutional]
773
+ size=1
774
+ stride=1
775
+ pad=1
776
+ filters=255
777
+ activation=linear
778
+
779
+
780
+ [yolo]
781
+ mask = 0,1,2
782
+ anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783
+ classes=80
784
+ num=9
785
+ jitter=.3
786
+ ignore_thresh = .5
787
+ truth_thresh = 1
788
+ random=1
789
+
deep_sort_torch/detector/YOLOv3/cfg/yolov3-tiny.cfg ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ batch=1
4
+ subdivisions=1
5
+ # Training
6
+ # batch=64
7
+ # subdivisions=2
8
+ width=416
9
+ height=416
10
+ channels=3
11
+ momentum=0.9
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.001
19
+ burn_in=1000
20
+ max_batches = 500200
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ [convolutional]
26
+ batch_normalize=1
27
+ filters=16
28
+ size=3
29
+ stride=1
30
+ pad=1
31
+ activation=leaky
32
+
33
+ [maxpool]
34
+ size=2
35
+ stride=2
36
+
37
+ [convolutional]
38
+ batch_normalize=1
39
+ filters=32
40
+ size=3
41
+ stride=1
42
+ pad=1
43
+ activation=leaky
44
+
45
+ [maxpool]
46
+ size=2
47
+ stride=2
48
+
49
+ [convolutional]
50
+ batch_normalize=1
51
+ filters=64
52
+ size=3
53
+ stride=1
54
+ pad=1
55
+ activation=leaky
56
+
57
+ [maxpool]
58
+ size=2
59
+ stride=2
60
+
61
+ [convolutional]
62
+ batch_normalize=1
63
+ filters=128
64
+ size=3
65
+ stride=1
66
+ pad=1
67
+ activation=leaky
68
+
69
+ [maxpool]
70
+ size=2
71
+ stride=2
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=256
76
+ size=3
77
+ stride=1
78
+ pad=1
79
+ activation=leaky
80
+
81
+ [maxpool]
82
+ size=2
83
+ stride=2
84
+
85
+ [convolutional]
86
+ batch_normalize=1
87
+ filters=512
88
+ size=3
89
+ stride=1
90
+ pad=1
91
+ activation=leaky
92
+
93
+ [maxpool]
94
+ size=2
95
+ stride=1
96
+
97
+ [convolutional]
98
+ batch_normalize=1
99
+ filters=1024
100
+ size=3
101
+ stride=1
102
+ pad=1
103
+ activation=leaky
104
+
105
+ ###########
106
+
107
+ [convolutional]
108
+ batch_normalize=1
109
+ filters=256
110
+ size=1
111
+ stride=1
112
+ pad=1
113
+ activation=leaky
114
+
115
+ [convolutional]
116
+ batch_normalize=1
117
+ filters=512
118
+ size=3
119
+ stride=1
120
+ pad=1
121
+ activation=leaky
122
+
123
+ [convolutional]
124
+ size=1
125
+ stride=1
126
+ pad=1
127
+ filters=255
128
+ activation=linear
129
+
130
+
131
+
132
+ [yolo]
133
+ mask = 3,4,5
134
+ anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135
+ classes=80
136
+ num=6
137
+ jitter=.3
138
+ ignore_thresh = .7
139
+ truth_thresh = 1
140
+ random=1
141
+
142
+ [route]
143
+ layers = -4
144
+
145
+ [convolutional]
146
+ batch_normalize=1
147
+ filters=128
148
+ size=1
149
+ stride=1
150
+ pad=1
151
+ activation=leaky
152
+
153
+ [upsample]
154
+ stride=2
155
+
156
+ [route]
157
+ layers = -1, 8
158
+
159
+ [convolutional]
160
+ batch_normalize=1
161
+ filters=256
162
+ size=3
163
+ stride=1
164
+ pad=1
165
+ activation=leaky
166
+
167
+ [convolutional]
168
+ size=1
169
+ stride=1
170
+ pad=1
171
+ filters=255
172
+ activation=linear
173
+
174
+ [yolo]
175
+ mask = 0,1,2
176
+ anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177
+ classes=80
178
+ num=6
179
+ jitter=.3
180
+ ignore_thresh = .7
181
+ truth_thresh = 1
182
+ random=1