astoken commited on
Commit
f517ba8
·
unverified ·
2 Parent(s): e18e681 121d90b

Merge branch 'master' into advanced_logging

Browse files
.dockerignore CHANGED
@@ -14,8 +14,10 @@ data/samples/*
14
  # Neural Network weights -----------------------------------------------------------------------------------------------
15
  **/*.weights
16
  **/*.pt
 
17
  **/*.onnx
18
  **/*.mlmodel
 
19
 
20
 
21
  # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 
14
  # Neural Network weights -----------------------------------------------------------------------------------------------
15
  **/*.weights
16
  **/*.pt
17
+ **/*.pth
18
  **/*.onnx
19
  **/*.mlmodel
20
+ **/*.torchscript
21
 
22
 
23
  # Below Copied From .gitignore -----------------------------------------------------------------------------------------
.gitignore CHANGED
@@ -50,6 +50,7 @@ gcp_test*.sh
50
  *.pt
51
  *.onnx
52
  *.mlmodel
 
53
  darknet53.conv.74
54
  yolov3-tiny.conv.15
55
 
 
50
  *.pt
51
  *.onnx
52
  *.mlmodel
53
+ *.torchscript
54
  darknet53.conv.74
55
  yolov3-tiny.conv.15
56
 
README.md CHANGED
@@ -41,9 +41,13 @@ $ pip install -U -r requirements.txt
41
  ## Tutorials
42
 
43
  * [Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb) <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
 
44
  * [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)
45
- * [Google Cloud Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
46
- * [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker)
 
 
 
47
 
48
 
49
  ## Inference
 
41
  ## Tutorials
42
 
43
  * [Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb) <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
44
+ * [Kaggle](https://www.kaggle.com/ultralytics/yolov5-tutorial)
45
  * [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)
46
+ * [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)
47
+ * [ONNX and TorchScript Export](https://github.com/ultralytics/yolov5/issues/251)
48
+ * [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303)
49
+ * [Google Cloud Quickstart](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
50
+ * [Docker Quickstart](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker)
51
 
52
 
53
  ## Inference
data/get_coco2017.sh CHANGED
@@ -1,7 +1,11 @@
1
  #!/bin/bash
2
- # Zip coco folder
3
- # zip -r coco.zip coco
4
- # tar -czvf coco.tar.gz coco
 
 
 
 
5
 
6
  # Download labels from Google Drive, accepting presented query
7
  filename="coco2017labels.zip"
 
1
  #!/bin/bash
2
+ # COCO 2017 dataset http://cocodataset.org
3
+ # Download command: bash yolov5/data/get_coco2017.sh
4
+ # Train command: python train.py --data ./data/coco.yaml
5
+ # Dataset should be placed next to yolov5 folder:
6
+ # /parent_folder
7
+ # /coco
8
+ # /yolov5
9
 
10
  # Download labels from Google Drive, accepting presented query
11
  filename="coco2017labels.zip"
data/get_voc.sh ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
+ # Download command: bash ./data/get_voc.sh
3
+ # Train command: python train.py --data voc.yaml
4
+ # Dataset should be placed next to yolov5 folder:
5
+ # /parent_folder
6
+ # /VOC
7
+ # /yolov5
8
+
9
+ start=`date +%s`
10
+
11
+ # handle optional download dir
12
+ if [ -z "$1" ]
13
+ then
14
+ # navigate to ~/tmp
15
+ echo "navigating to ../tmp/ ..."
16
+ mkdir -p ../tmp
17
+ cd ../tmp/
18
+ else
19
+ # check if is valid directory
20
+ if [ ! -d $1 ]; then
21
+ echo $1 "is not a valid directory"
22
+ exit 0
23
+ fi
24
+ echo "navigating to" $1 "..."
25
+ cd $1
26
+ fi
27
+
28
+ echo "Downloading VOC2007 trainval ..."
29
+ # Download the data.
30
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
31
+ echo "Downloading VOC2007 test data ..."
32
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
33
+ echo "Done downloading."
34
+
35
+ # Extract data
36
+ echo "Extracting trainval ..."
37
+ tar -xf VOCtrainval_06-Nov-2007.tar
38
+ echo "Extracting test ..."
39
+ tar -xf VOCtest_06-Nov-2007.tar
40
+ echo "removing tars ..."
41
+ rm VOCtrainval_06-Nov-2007.tar
42
+ rm VOCtest_06-Nov-2007.tar
43
+
44
+ end=`date +%s`
45
+ runtime=$((end-start))
46
+
47
+ echo "Completed in" $runtime "seconds"
48
+
49
+ start=`date +%s`
50
+
51
+ # handle optional download dir
52
+ if [ -z "$1" ]
53
+ then
54
+ # navigate to ~/tmp
55
+ echo "navigating to ../tmp/ ..."
56
+ mkdir -p ../tmp
57
+ cd ../tmp/
58
+ else
59
+ # check if is valid directory
60
+ if [ ! -d $1 ]; then
61
+ echo $1 "is not a valid directory"
62
+ exit 0
63
+ fi
64
+ echo "navigating to" $1 "..."
65
+ cd $1
66
+ fi
67
+
68
+ echo "Downloading VOC2012 trainval ..."
69
+ # Download the data.
70
+ curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
71
+ echo "Done downloading."
72
+
73
+
74
+ # Extract data
75
+ echo "Extracting trainval ..."
76
+ tar -xf VOCtrainval_11-May-2012.tar
77
+ echo "removing tar ..."
78
+ rm VOCtrainval_11-May-2012.tar
79
+
80
+ end=`date +%s`
81
+ runtime=$((end-start))
82
+
83
+ echo "Completed in" $runtime "seconds"
84
+
85
+ cd ../tmp
86
+ echo "Spliting dataset..."
87
+ python3 - "$@" <<END
88
+ import xml.etree.ElementTree as ET
89
+ import pickle
90
+ import os
91
+ from os import listdir, getcwd
92
+ from os.path import join
93
+
94
+ sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
95
+
96
+ classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
97
+
98
+
99
+ def convert(size, box):
100
+ dw = 1./(size[0])
101
+ dh = 1./(size[1])
102
+ x = (box[0] + box[1])/2.0 - 1
103
+ y = (box[2] + box[3])/2.0 - 1
104
+ w = box[1] - box[0]
105
+ h = box[3] - box[2]
106
+ x = x*dw
107
+ w = w*dw
108
+ y = y*dh
109
+ h = h*dh
110
+ return (x,y,w,h)
111
+
112
+ def convert_annotation(year, image_id):
113
+ in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
114
+ out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
115
+ tree=ET.parse(in_file)
116
+ root = tree.getroot()
117
+ size = root.find('size')
118
+ w = int(size.find('width').text)
119
+ h = int(size.find('height').text)
120
+
121
+ for obj in root.iter('object'):
122
+ difficult = obj.find('difficult').text
123
+ cls = obj.find('name').text
124
+ if cls not in classes or int(difficult)==1:
125
+ continue
126
+ cls_id = classes.index(cls)
127
+ xmlbox = obj.find('bndbox')
128
+ b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
129
+ bb = convert((w,h), b)
130
+ out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
131
+
132
+ wd = getcwd()
133
+
134
+ for year, image_set in sets:
135
+ if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
136
+ os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
137
+ image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
138
+ list_file = open('%s_%s.txt'%(year, image_set), 'w')
139
+ for image_id in image_ids:
140
+ list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
141
+ convert_annotation(year, image_id)
142
+ list_file.close()
143
+
144
+ END
145
+
146
+ cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt
147
+ cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt
148
+
149
+ python3 - "$@" <<END
150
+
151
+ import shutil
152
+ import os
153
+ os.system('mkdir ../VOC/')
154
+ os.system('mkdir ../VOC/images')
155
+ os.system('mkdir ../VOC/images/train')
156
+ os.system('mkdir ../VOC/images/val')
157
+
158
+ os.system('mkdir ../VOC/labels')
159
+ os.system('mkdir ../VOC/labels/train')
160
+ os.system('mkdir ../VOC/labels/val')
161
+
162
+ import os
163
+ print(os.path.exists('../tmp/train.txt'))
164
+ f = open('../tmp/train.txt', 'r')
165
+ lines = f.readlines()
166
+
167
+ for line in lines:
168
+ #print(line.split('/')[-1][:-1])
169
+ line = "/".join(line.split('/')[2:])
170
+ #print(line)
171
+ if (os.path.exists("../" + line[:-1])):
172
+ os.system("cp ../"+ line[:-1] + " ../VOC/images/train")
173
+
174
+ print(os.path.exists('../tmp/train.txt'))
175
+ f = open('../tmp/train.txt', 'r')
176
+ lines = f.readlines()
177
+
178
+ for line in lines:
179
+ #print(line.split('/')[-1][:-1])
180
+ line = "/".join(line.split('/')[2:])
181
+ line = line.replace('JPEGImages', 'labels')
182
+ line = line.replace('jpg', 'txt')
183
+ #print(line)
184
+ if (os.path.exists("../" + line[:-1])):
185
+ os.system("cp ../"+ line[:-1] + " ../VOC/labels/train")
186
+
187
+ print(os.path.exists('../tmp/2007_test.txt'))
188
+ f = open('../tmp/2007_test.txt', 'r')
189
+ lines = f.readlines()
190
+
191
+ for line in lines:
192
+ #print(line.split('/')[-1][:-1])
193
+ line = "/".join(line.split('/')[2:])
194
+
195
+ if (os.path.exists("../" + line[:-1])):
196
+ os.system("cp ../"+ line[:-1] + " ../VOC/images/val")
197
+
198
+ print(os.path.exists('../tmp/2007_test.txt'))
199
+ f = open('../tmp/2007_test.txt', 'r')
200
+ lines = f.readlines()
201
+
202
+ for line in lines:
203
+ #print(line.split('/')[-1][:-1])
204
+ line = "/".join(line.split('/')[2:])
205
+ line = line.replace('JPEGImages', 'labels')
206
+ line = line.replace('jpg', 'txt')
207
+ #print(line)
208
+ if (os.path.exists("../" + line[:-1])):
209
+ os.system("cp ../"+ line[:-1] + " ../VOC/labels/val")
210
+
211
+ END
212
+
213
+ rm -rf ../tmp # remove temporary directory
214
+ echo "VOC download done."
data/voc.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
2
+ # Download command: bash ./data/get_voc.sh
3
+ # Train command: python train.py --data voc.yaml
4
+ # Dataset should be placed next to yolov5 folder:
5
+ # /parent_folder
6
+ # /VOC
7
+ # /yolov5
8
+
9
+ # train and val datasets (image directory or *.txt file with image paths)
10
+ train: ../VOC/images/train/
11
+ val: ../VOC/images/val/
12
+
13
+ # number of classes
14
+ nc: 20
15
+
16
+ # class names
17
+ names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
18
+ 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
detect.py CHANGED
@@ -21,10 +21,8 @@ def detect(save_img=False):
21
 
22
  # Load model
23
  google_utils.attempt_download(weights)
24
- model = torch.load(weights, map_location=device)['model'].float() # load to FP32
25
- # torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning
26
- # model.fuse()
27
- model.to(device).eval()
28
  if half:
29
  model.half() # to FP16
30
 
@@ -82,7 +80,7 @@ def detect(save_img=False):
82
  save_path = str(Path(out) / Path(p).name)
83
  txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
84
  s += '%gx%g ' % img.shape[2:] # print string
85
- gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] #  normalization gain whwh
86
  if det is not None and len(det):
87
  # Rescale boxes from img_size to im0 size
88
  det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
@@ -122,10 +120,11 @@ def detect(save_img=False):
122
  if isinstance(vid_writer, cv2.VideoWriter):
123
  vid_writer.release() # release previous video writer
124
 
 
125
  fps = vid_cap.get(cv2.CAP_PROP_FPS)
126
  w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
127
  h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
128
- vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
129
  vid_writer.write(im0)
130
 
131
  if save_txt or save_img:
@@ -144,21 +143,20 @@ if __name__ == '__main__':
144
  parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
145
  parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
146
  parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
147
- parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
148
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
149
  parser.add_argument('--view-img', action='store_true', help='display results')
150
  parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
151
  parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
152
  parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
153
  parser.add_argument('--augment', action='store_true', help='augmented inference')
 
154
  opt = parser.parse_args()
155
- opt.img_size = check_img_size(opt.img_size)
156
  print(opt)
157
 
158
  with torch.no_grad():
159
- detect()
160
-
161
- # Update all models
162
- # for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
163
- # detect()
164
- # create_pretrained(opt.weights, opt.weights)
 
21
 
22
  # Load model
23
  google_utils.attempt_download(weights)
24
+ model = torch.load(weights, map_location=device)['model'].float().eval() # load FP32 model
25
+ imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
 
 
26
  if half:
27
  model.half() # to FP16
28
 
 
80
  save_path = str(Path(out) / Path(p).name)
81
  txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
82
  s += '%gx%g ' % img.shape[2:] # print string
83
+ gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
84
  if det is not None and len(det):
85
  # Rescale boxes from img_size to im0 size
86
  det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
 
120
  if isinstance(vid_writer, cv2.VideoWriter):
121
  vid_writer.release() # release previous video writer
122
 
123
+ fourcc = 'mp4v' # output video codec
124
  fps = vid_cap.get(cv2.CAP_PROP_FPS)
125
  w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
126
  h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
127
+ vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
128
  vid_writer.write(im0)
129
 
130
  if save_txt or save_img:
 
143
  parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
144
  parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
145
  parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
 
146
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
147
  parser.add_argument('--view-img', action='store_true', help='display results')
148
  parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
149
  parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
150
  parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
151
  parser.add_argument('--augment', action='store_true', help='augmented inference')
152
+ parser.add_argument('--update', action='store_true', help='update all models')
153
  opt = parser.parse_args()
 
154
  print(opt)
155
 
156
  with torch.no_grad():
157
+ if opt.update: # update all models (to fix SourceChangeWarning)
158
+ for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
159
+ detect()
160
+ create_pretrained(opt.weights, opt.weights)
161
+ else:
162
+ detect()
models/common.py CHANGED
@@ -1,9 +1,15 @@
1
  # This file contains modules common to various models
2
 
3
-
4
  from utils.utils import *
5
 
6
 
 
 
 
 
 
 
 
7
  def DWConv(c1, c2, k=1, s=1, act=True):
8
  # Depthwise convolution
9
  return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
@@ -11,10 +17,9 @@ def DWConv(c1, c2, k=1, s=1, act=True):
11
 
12
  class Conv(nn.Module):
13
  # Standard convolution
14
- def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
15
  super(Conv, self).__init__()
16
- p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # padding
17
- self.conv = nn.Conv2d(c1, c2, k, s, p, groups=g, bias=False)
18
  self.bn = nn.BatchNorm2d(c2)
19
  self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
20
 
@@ -46,7 +51,7 @@ class BottleneckCSP(nn.Module):
46
  self.cv1 = Conv(c1, c_, 1, 1)
47
  self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
48
  self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
49
- self.cv4 = Conv(c2, c2, 1, 1)
50
  self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
51
  self.act = nn.LeakyReLU(0.1, inplace=True)
52
  self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
@@ -79,9 +84,9 @@ class Flatten(nn.Module):
79
 
80
  class Focus(nn.Module):
81
  # Focus wh information into c-space
82
- def __init__(self, c1, c2, k=1):
83
  super(Focus, self).__init__()
84
- self.conv = Conv(c1 * 4, c2, k, 1)
85
 
86
  def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
87
  return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
 
1
  # This file contains modules common to various models
2
 
 
3
  from utils.utils import *
4
 
5
 
6
+ def autopad(k, p=None): # kernel, padding
7
+ # Pad to 'same'
8
+ if p is None:
9
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
10
+ return p
11
+
12
+
13
  def DWConv(c1, c2, k=1, s=1, act=True):
14
  # Depthwise convolution
15
  return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
 
17
 
18
  class Conv(nn.Module):
19
  # Standard convolution
20
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
21
  super(Conv, self).__init__()
22
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 
23
  self.bn = nn.BatchNorm2d(c2)
24
  self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
25
 
 
51
  self.cv1 = Conv(c1, c_, 1, 1)
52
  self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
53
  self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
54
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
55
  self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
56
  self.act = nn.LeakyReLU(0.1, inplace=True)
57
  self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
 
84
 
85
  class Focus(nn.Module):
86
  # Focus wh information into c-space
87
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
88
  super(Focus, self).__init__()
89
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
90
 
91
  def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
92
  return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
models/experimental.py CHANGED
@@ -1,6 +1,41 @@
 
 
1
  from models.common import *
2
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  class Sum(nn.Module):
5
  # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
6
  def __init__(self, n, weight=False): # n: number of inputs
@@ -50,17 +85,6 @@ class GhostBottleneck(nn.Module):
50
  return self.conv(x) + self.shortcut(x)
51
 
52
 
53
- class ConvPlus(nn.Module):
54
- # Plus-shaped convolution
55
- def __init__(self, c1, c2, k=3, s=1, g=1, bias=True): # ch_in, ch_out, kernel, stride, groups
56
- super(ConvPlus, self).__init__()
57
- self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias)
58
- self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias)
59
-
60
- def forward(self, x):
61
- return self.cv1(x) + self.cv2(x)
62
-
63
-
64
  class MixConv2d(nn.Module):
65
  # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
66
  def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
@@ -83,3 +107,15 @@ class MixConv2d(nn.Module):
83
 
84
  def forward(self, x):
85
  return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file contains experimental modules
2
+
3
  from models.common import *
4
 
5
 
6
+ class CrossConv(nn.Module):
7
+ # Cross Convolution Downsample
8
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
9
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
10
+ super(CrossConv, self).__init__()
11
+ c_ = int(c2 * e) # hidden channels
12
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
13
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
14
+ self.add = shortcut and c1 == c2
15
+
16
+ def forward(self, x):
17
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
18
+
19
+
20
+ class C3(nn.Module):
21
+ # Cross Convolution CSP
22
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
23
+ super(C3, self).__init__()
24
+ c_ = int(c2 * e) # hidden channels
25
+ self.cv1 = Conv(c1, c_, 1, 1)
26
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
27
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
28
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
29
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
30
+ self.act = nn.LeakyReLU(0.1, inplace=True)
31
+ self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
32
+
33
+ def forward(self, x):
34
+ y1 = self.cv3(self.m(self.cv1(x)))
35
+ y2 = self.cv2(x)
36
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
37
+
38
+
39
  class Sum(nn.Module):
40
  # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
41
  def __init__(self, n, weight=False): # n: number of inputs
 
85
  return self.conv(x) + self.shortcut(x)
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
88
  class MixConv2d(nn.Module):
89
  # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
90
  def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 
107
 
108
  def forward(self, x):
109
  return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
110
+
111
+
112
+ class Ensemble(nn.ModuleList):
113
+ # Ensemble of models
114
+ def __init__(self):
115
+ super(Ensemble, self).__init__()
116
+
117
+ def forward(self, x, augment=False):
118
+ y = []
119
+ for module in self:
120
+ y.append(module(x, augment)[0])
121
+ return torch.cat(y, 1), None # ensembled inference output, train output
models/export.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
2
+
3
+ Usage:
4
+ $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
5
+ """
6
+
7
+ import argparse
8
+
9
+ from models.common import *
10
+ from utils import google_utils
11
+
12
+ if __name__ == '__main__':
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
15
+ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
16
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
17
+ opt = parser.parse_args()
18
+ opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
19
+ print(opt)
20
+
21
+ # Input
22
+ img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
23
+
24
+ # Load PyTorch model
25
+ google_utils.attempt_download(opt.weights)
26
+ model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
27
+ model.eval()
28
+ model.model[-1].export = True # set Detect() layer export=True
29
+ y = model(img) # dry run
30
+
31
+ # TorchScript export
32
+ try:
33
+ print('\nStarting TorchScript export with torch %s...' % torch.__version__)
34
+ f = opt.weights.replace('.pt', '.torchscript') # filename
35
+ ts = torch.jit.trace(model, img)
36
+ ts.save(f)
37
+ print('TorchScript export success, saved as %s' % f)
38
+ except Exception as e:
39
+ print('TorchScript export failure: %s' % e)
40
+
41
+ # ONNX export
42
+ try:
43
+ import onnx
44
+
45
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
46
+ f = opt.weights.replace('.pt', '.onnx') # filename
47
+ model.fuse() # only for ONNX
48
+ torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
49
+ output_names=['classes', 'boxes'] if y is None else ['output'])
50
+
51
+ # Checks
52
+ onnx_model = onnx.load(f) # load onnx model
53
+ onnx.checker.check_model(onnx_model) # check onnx model
54
+ print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
55
+ print('ONNX export success, saved as %s' % f)
56
+ except Exception as e:
57
+ print('ONNX export failure: %s' % e)
58
+
59
+ # CoreML export
60
+ try:
61
+ import coremltools as ct
62
+
63
+ print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
64
+ model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape)]) # convert
65
+ f = opt.weights.replace('.pt', '.mlmodel') # filename
66
+ model.save(f)
67
+ print('CoreML export success, saved as %s' % f)
68
+ except Exception as e:
69
+ print('CoreML export failure: %s' % e)
70
+
71
+ # Finish
72
+ print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
models/onnx_export.py DELETED
@@ -1,42 +0,0 @@
1
- """Exports a pytorch *.pt model to *.onnx format
2
-
3
- Usage:
4
- $ export PYTHONPATH="$PWD" && python models/onnx_export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
5
- """
6
-
7
- import argparse
8
-
9
- import onnx
10
-
11
- from models.common import *
12
- from utils import google_utils
13
-
14
- if __name__ == '__main__':
15
- parser = argparse.ArgumentParser()
16
- parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
17
- parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
18
- parser.add_argument('--batch-size', type=int, default=1, help='batch size')
19
- opt = parser.parse_args()
20
- print(opt)
21
-
22
- # Parameters
23
- f = opt.weights.replace('.pt', '.onnx') # onnx filename
24
- img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size, (1, 3, 320, 192) iDetection
25
-
26
- # Load pytorch model
27
- google_utils.attempt_download(opt.weights)
28
- model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
29
- model.eval()
30
- model.fuse()
31
-
32
- # Export to onnx
33
- model.model[-1].export = True # set Detect() layer export=True
34
- _ = model(img) # dry run
35
- torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'],
36
- output_names=['output']) # output_names=['classes', 'boxes']
37
-
38
- # Check onnx model
39
- model = onnx.load(f) # load onnx model
40
- onnx.checker.check_model(model) # check onnx model
41
- print(onnx.helper.printable_graph(model.graph)) # print a human readable representation of the graph
42
- print('Export complete. ONNX model saved to %s\nView with https://github.com/lutzroeder/netron' % f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/yolo.py CHANGED
@@ -48,21 +48,27 @@ class Model(nn.Module):
48
  if type(model_cfg) is dict:
49
  self.md = model_cfg # model dict
50
  else: # is *.yaml
 
51
  with open(model_cfg) as f:
52
  self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict
53
 
54
  # Define model
55
- if nc:
 
56
  self.md['nc'] = nc # override yaml value
57
  self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out
58
  # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
59
 
60
  # Build strides, anchors
61
  m = self.model[-1] # Detect()
62
- m.stride = torch.tensor([128 / x.shape[-2] for x in self.forward(torch.zeros(1, ch, 128, 128))]) # forward
63
- m.anchors /= m.stride.view(-1, 1, 1)
64
- check_anchor_order(m)
65
- self.stride = m.stride
 
 
 
 
66
 
67
  # Init weights, biases
68
  torch_utils.initialize_weights(self)
@@ -136,17 +142,17 @@ class Model(nn.Module):
136
  # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
137
 
138
  def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
139
- print('Fusing layers...')
140
  for m in self.model.modules():
141
  if type(m) is Conv:
142
  m.conv = torch_utils.fuse_conv_and_bn(m.conv, m.bn) # update conv
143
  m.bn = None # remove batchnorm
144
  m.forward = m.fuseforward # update forward
145
  torch_utils.model_info(self)
146
-
147
 
148
  def parse_model(md, ch): # model_dict, input_channels(3)
149
- print('\n%3s%15s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
150
  anchors, nc, gd, gw = md['anchors'], md['nc'], md['depth_multiple'], md['width_multiple']
151
  na = (len(anchors[0]) // 2) # number of anchors
152
  no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
@@ -161,7 +167,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
161
  pass
162
 
163
  n = max(round(n * gd), 1) if n > 1 else n # depth gain
164
- if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, ConvPlus, BottleneckCSP]:
165
  c1, c2 = ch[f], args[0]
166
 
167
  # Normal
@@ -182,7 +188,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
182
  # c2 = make_divisible(c2, 8) if c2 != no else c2
183
 
184
  args = [c1, c2, *args[1:]]
185
- if m is BottleneckCSP:
186
  args.insert(2, n)
187
  n = 1
188
  elif m is nn.BatchNorm2d:
@@ -198,7 +204,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
198
  t = str(m)[8:-2].replace('__main__.', '') # module type
199
  np = sum([x.numel() for x in m_.parameters()]) # number params
200
  m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
201
- print('%3s%15s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
202
  save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
203
  layers.append(m_)
204
  ch.append(c2)
 
48
  if type(model_cfg) is dict:
49
  self.md = model_cfg # model dict
50
  else: # is *.yaml
51
+ import yaml # for torch hub
52
  with open(model_cfg) as f:
53
  self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict
54
 
55
  # Define model
56
+ if nc and nc != self.md['nc']:
57
+ print('Overriding %s nc=%g with nc=%g' % (model_cfg, self.md['nc'], nc))
58
  self.md['nc'] = nc # override yaml value
59
  self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out
60
  # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
61
 
62
  # Build strides, anchors
63
  m = self.model[-1] # Detect()
64
+ if isinstance(m, Detect):
65
+ s = 128 # 2x min stride
66
+ m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
67
+ m.anchors /= m.stride.view(-1, 1, 1)
68
+ check_anchor_order(m)
69
+ self.stride = m.stride
70
+ self._initialize_biases() # only run once
71
+ # print('Strides: %s' % m.stride.tolist())
72
 
73
  # Init weights, biases
74
  torch_utils.initialize_weights(self)
 
142
  # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
143
 
144
  def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
145
+ print('Fusing layers... ', end='')
146
  for m in self.model.modules():
147
  if type(m) is Conv:
148
  m.conv = torch_utils.fuse_conv_and_bn(m.conv, m.bn) # update conv
149
  m.bn = None # remove batchnorm
150
  m.forward = m.fuseforward # update forward
151
  torch_utils.model_info(self)
152
+ return self
153
 
154
  def parse_model(md, ch): # model_dict, input_channels(3)
155
+ print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
156
  anchors, nc, gd, gw = md['anchors'], md['nc'], md['depth_multiple'], md['width_multiple']
157
  na = (len(anchors[0]) // 2) # number of anchors
158
  no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
 
167
  pass
168
 
169
  n = max(round(n * gd), 1) if n > 1 else n # depth gain
170
+ if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
171
  c1, c2 = ch[f], args[0]
172
 
173
  # Normal
 
188
  # c2 = make_divisible(c2, 8) if c2 != no else c2
189
 
190
  args = [c1, c2, *args[1:]]
191
+ if m in [BottleneckCSP, C3]:
192
  args.insert(2, n)
193
  n = 1
194
  elif m is nn.BatchNorm2d:
 
204
  t = str(m)[8:-2].replace('__main__.', '') # module type
205
  np = sum([x.numel() for x in m_.parameters()]) # number params
206
  m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
207
+ print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
208
  save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
209
  layers.append(m_)
210
  ch.append(c2)
test.py CHANGED
@@ -26,6 +26,7 @@ def test(data,
26
  # Initialize/load model and set device
27
  if model is None:
28
  training = False
 
29
  device = torch_utils.select_device(opt.device, batch_size=batch_size)
30
 
31
  # Remove previous
@@ -34,10 +35,8 @@ def test(data,
34
 
35
  # Load model
36
  google_utils.attempt_download(weights)
37
- model = torch.load(weights, map_location=device)['model'].float() # load to FP32
38
- torch_utils.model_info(model)
39
- model.fuse()
40
- model.to(device)
41
 
42
  # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
43
  # if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -62,7 +61,6 @@ def test(data,
62
 
63
  # Dataloader
64
  if dataloader is None: # not training
65
- merge = opt.merge # use Merge NMS
66
  img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
67
  _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
68
  path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
@@ -246,7 +244,6 @@ if __name__ == '__main__':
246
  parser.add_argument('--merge', action='store_true', help='use Merge NMS')
247
  parser.add_argument('--verbose', action='store_true', help='report mAP by class')
248
  opt = parser.parse_args()
249
- opt.img_size = check_img_size(opt.img_size)
250
  opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
251
  opt.data = check_file(opt.data) # check file
252
  print(opt)
 
26
  # Initialize/load model and set device
27
  if model is None:
28
  training = False
29
+ merge = opt.merge # use Merge NMS
30
  device = torch_utils.select_device(opt.device, batch_size=batch_size)
31
 
32
  # Remove previous
 
35
 
36
  # Load model
37
  google_utils.attempt_download(weights)
38
+ model = torch.load(weights, map_location=device)['model'].float().fuse().to(device) # load to FP32
39
+ imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
 
 
40
 
41
  # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
42
  # if device.type != 'cpu' and torch.cuda.device_count() > 1:
 
61
 
62
  # Dataloader
63
  if dataloader is None: # not training
 
64
  img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
65
  _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
66
  path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
 
244
  parser.add_argument('--merge', action='store_true', help='use Merge NMS')
245
  parser.add_argument('--verbose', action='store_true', help='report mAP by class')
246
  opt = parser.parse_args()
 
247
  opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
248
  opt.data = check_file(opt.data) # check file
249
  print(opt)
train.py CHANGED
@@ -72,9 +72,7 @@ def train(hyp):
72
  os.remove(f)
73
 
74
  # Create model
75
- model = Model(opt.cfg).to(device)
76
- assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
77
- model.names = data_dict['names']
78
 
79
  # Image sizes
80
  gs = int(max(model.stride)) # grid size (max stride)
@@ -101,6 +99,9 @@ def train(hyp):
101
 
102
  optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
103
  optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
 
 
 
104
  print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
105
  del pg0, pg1, pg2
106
 
@@ -116,8 +117,9 @@ def train(hyp):
116
  if model.state_dict()[k].shape == v.shape} # to FP32, filter
117
  model.load_state_dict(ckpt['model'], strict=False)
118
  except KeyError as e:
119
- s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s." \
120
- % (opt.weights, opt.cfg, opt.weights)
 
121
  raise KeyError(s) from e
122
 
123
  # load optimizer
@@ -130,16 +132,20 @@ def train(hyp):
130
  with open(results_file, 'w') as file:
131
  file.write(ckpt['training_results']) # write results.txt
132
 
 
133
  start_epoch = ckpt['epoch'] + 1
 
 
 
 
 
134
  del ckpt
135
 
136
  # Mixed precision training https://github.com/NVIDIA/apex
137
  if mixed_precision:
138
  model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
139
 
140
- # Scheduler https://arxiv.org/pdf/1812.01187.pdf
141
- lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
142
- scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
143
  scheduler.last_epoch = start_epoch - 1 # do not move
144
  # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
145
  plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir)
@@ -161,7 +167,7 @@ def train(hyp):
161
 
162
  # Testloader
163
  testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
164
- hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
165
 
166
  # Model parameters
167
  hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
@@ -169,6 +175,7 @@ def train(hyp):
169
  model.hyp = hyp # attach hyperparameters to model
170
  model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
171
  model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
 
172
 
173
  #save hyperparamter and training options in run folder
174
  with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f:
@@ -216,6 +223,10 @@ def train(hyp):
216
  image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
217
  dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
218
 
 
 
 
 
219
  mloss = torch.zeros(4, device=device) # mean losses
220
  print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
221
  pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
@@ -323,7 +334,7 @@ def train(hyp):
323
  ckpt = {'epoch': epoch,
324
  'best_fitness': best_fitness,
325
  'training_results': f.read(),
326
- 'model': ema.ema.module if hasattr(model, 'module') else ema.ema,
327
  'optimizer': None if final_epoch else optimizer.state_dict()}
328
 
329
  # Save last, best and delete
@@ -335,17 +346,17 @@ def train(hyp):
335
  # end epoch ----------------------------------------------------------------------------------------------------
336
  # end training
337
 
338
- n = opt.name
339
- if len(n):
340
- n = '_' + n if not n.isnumeric() else n
341
- fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
342
- for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', wdir + 'results.txt'], [flast, fbest, fresults]):
343
- if os.path.exists(f1):
344
- os.rename(f1, f2) # rename
345
- ispt = f2.endswith('.pt') # is *.pt
346
- strip_optimizer(f2) if ispt else None # strip optimizer
347
- os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
348
-
349
  if not opt.evolve:
350
  plot_results(save_dir = log_dir) # save as results.png
351
  print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
@@ -364,6 +375,7 @@ if __name__ == '__main__':
364
  parser.add_argument('--batch-size', type=int, default=16)
365
  parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.')
366
  parser.add_argument('--rect', action='store_true', help='rectangular training')
 
367
  parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
368
  parser.add_argument('--notest', action='store_true', help='only test final epoch')
369
  parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
@@ -378,6 +390,7 @@ if __name__ == '__main__':
378
 
379
  opt = parser.parse_args()
380
 
 
381
  opt.cfg = check_file(opt.cfg) # check file
382
  opt.data = check_file(opt.data) # check file
383
  opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file
 
72
  os.remove(f)
73
 
74
  # Create model
75
+ model = Model(opt.cfg, nc=data_dict['nc']).to(device)
 
 
76
 
77
  # Image sizes
78
  gs = int(max(model.stride)) # grid size (max stride)
 
99
 
100
  optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
101
  optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
102
+ # Scheduler https://arxiv.org/pdf/1812.01187.pdf
103
+ lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
104
+ scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
105
  print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
106
  del pg0, pg1, pg2
107
 
 
117
  if model.state_dict()[k].shape == v.shape} # to FP32, filter
118
  model.load_state_dict(ckpt['model'], strict=False)
119
  except KeyError as e:
120
+ s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
121
+ "Please delete or update %s and try again, or use --weights '' to train from scratch." \
122
+ % (opt.weights, opt.cfg, opt.weights, opt.weights)
123
  raise KeyError(s) from e
124
 
125
  # load optimizer
 
132
  with open(results_file, 'w') as file:
133
  file.write(ckpt['training_results']) # write results.txt
134
 
135
+ # epochs
136
  start_epoch = ckpt['epoch'] + 1
137
+ if epochs < start_epoch:
138
+ print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
139
+ (opt.weights, ckpt['epoch'], epochs))
140
+ epochs += ckpt['epoch'] # finetune additional epochs
141
+
142
  del ckpt
143
 
144
  # Mixed precision training https://github.com/NVIDIA/apex
145
  if mixed_precision:
146
  model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
147
 
148
+
 
 
149
  scheduler.last_epoch = start_epoch - 1 # do not move
150
  # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
151
  plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir)
 
167
 
168
  # Testloader
169
  testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
170
+ hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
171
 
172
  # Model parameters
173
  hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
 
175
  model.hyp = hyp # attach hyperparameters to model
176
  model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
177
  model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
178
+ model.names = data_dict['names']
179
 
180
  #save hyperparamter and training options in run folder
181
  with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f:
 
223
  image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
224
  dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
225
 
226
+ # Update mosaic border
227
+ # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
228
+ # dataset.mosaic_border = [b - imgsz, -b] # height, width borders
229
+
230
  mloss = torch.zeros(4, device=device) # mean losses
231
  print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
232
  pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
 
334
  ckpt = {'epoch': epoch,
335
  'best_fitness': best_fitness,
336
  'training_results': f.read(),
337
+ 'model': ema.ema,
338
  'optimizer': None if final_epoch else optimizer.state_dict()}
339
 
340
  # Save last, best and delete
 
346
  # end epoch ----------------------------------------------------------------------------------------------------
347
  # end training
348
 
349
+ # Strip optimizers
350
+ n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
351
+ fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
352
+ for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
353
+ if os.path.exists(f1):
354
+ os.rename(f1, f2) # rename
355
+ ispt = f2.endswith('.pt') # is *.pt
356
+ strip_optimizer(f2) if ispt else None # strip optimizer
357
+ os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
358
+
359
+ # Finish
360
  if not opt.evolve:
361
  plot_results(save_dir = log_dir) # save as results.png
362
  print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
 
375
  parser.add_argument('--batch-size', type=int, default=16)
376
  parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.')
377
  parser.add_argument('--rect', action='store_true', help='rectangular training')
378
+ parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
379
  parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
380
  parser.add_argument('--notest', action='store_true', help='only test final epoch')
381
  parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
 
390
 
391
  opt = parser.parse_args()
392
 
393
+ opt.weights = last if opt.resume and not opt.weights else opt.weights
394
  opt.cfg = check_file(opt.cfg) # check file
395
  opt.data = check_file(opt.data) # check file
396
  opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file
utils/datasets.py CHANGED
@@ -62,7 +62,7 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
62
 
63
 
64
  class LoadImages: # for inference
65
- def __init__(self, path, img_size=416):
66
  path = str(Path(path)) # os-agnostic
67
  files = []
68
  if os.path.isdir(path):
@@ -139,7 +139,7 @@ class LoadImages: # for inference
139
 
140
 
141
  class LoadWebcam: # for inference
142
- def __init__(self, pipe=0, img_size=416):
143
  self.img_size = img_size
144
 
145
  if pipe == '0':
@@ -204,7 +204,7 @@ class LoadWebcam: # for inference
204
 
205
 
206
  class LoadStreams: # multiple IP or RTSP cameras
207
- def __init__(self, sources='streams.txt', img_size=416):
208
  self.mode = 'images'
209
  self.img_size = img_size
210
 
@@ -277,7 +277,7 @@ class LoadStreams: # multiple IP or RTSP cameras
277
 
278
 
279
  class LoadImagesAndLabels(Dataset): # for training/testing
280
- def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
281
  cache_images=False, single_cls=False, stride=32, pad=0.0):
282
  try:
283
  path = str(Path(path)) # os-agnostic
@@ -307,6 +307,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing
307
  self.image_weights = image_weights
308
  self.rect = False if image_weights else rect
309
  self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
 
 
310
 
311
  # Define labels
312
  self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
@@ -585,7 +587,7 @@ def load_mosaic(self, index):
585
 
586
  labels4 = []
587
  s = self.img_size
588
- xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)] # mosaic center x, y
589
  indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
590
  for i, index in enumerate(indices):
591
  # Load image
@@ -626,6 +628,9 @@ def load_mosaic(self, index):
626
  # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
627
  np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
628
 
 
 
 
629
  # Augment
630
  # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
631
  img4, labels4 = random_affine(img4, labels4,
@@ -633,12 +638,29 @@ def load_mosaic(self, index):
633
  translate=self.hyp['translate'],
634
  scale=self.hyp['scale'],
635
  shear=self.hyp['shear'],
636
- border=-s // 2) # border to remove
637
 
638
  return img4, labels4
639
 
640
 
641
- def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
643
  shape = img.shape[:2] # current shape [height, width]
644
  if isinstance(new_shape, int):
@@ -671,13 +693,13 @@ def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale
671
  return img, ratio, (dw, dh)
672
 
673
 
674
- def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
675
  # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
676
  # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
677
  # targets = [cls, xyxy]
678
 
679
- height = img.shape[0] + border * 2
680
- width = img.shape[1] + border * 2
681
 
682
  # Rotation and Scale
683
  R = np.eye(3)
@@ -689,8 +711,8 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
689
 
690
  # Translation
691
  T = np.eye(3)
692
- T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels)
693
- T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels)
694
 
695
  # Shear
696
  S = np.eye(3)
@@ -699,7 +721,7 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
699
 
700
  # Combined rotation matrix
701
  M = S @ T @ R # ORDER IS IMPORTANT HERE!!
702
- if (border != 0) or (M != np.eye(3)).any(): # image changed
703
  img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
704
 
705
  # Transform label coordinates
@@ -762,6 +784,7 @@ def cutout(image, labels):
762
  box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
763
 
764
  # Intersection over box2 area
 
765
  return inter_area / box2_area
766
 
767
  # create random masks
 
62
 
63
 
64
  class LoadImages: # for inference
65
+ def __init__(self, path, img_size=640):
66
  path = str(Path(path)) # os-agnostic
67
  files = []
68
  if os.path.isdir(path):
 
139
 
140
 
141
  class LoadWebcam: # for inference
142
+ def __init__(self, pipe=0, img_size=640):
143
  self.img_size = img_size
144
 
145
  if pipe == '0':
 
204
 
205
 
206
  class LoadStreams: # multiple IP or RTSP cameras
207
+ def __init__(self, sources='streams.txt', img_size=640):
208
  self.mode = 'images'
209
  self.img_size = img_size
210
 
 
277
 
278
 
279
  class LoadImagesAndLabels(Dataset): # for training/testing
280
+ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
281
  cache_images=False, single_cls=False, stride=32, pad=0.0):
282
  try:
283
  path = str(Path(path)) # os-agnostic
 
307
  self.image_weights = image_weights
308
  self.rect = False if image_weights else rect
309
  self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
310
+ self.mosaic_border = [-img_size // 2, -img_size // 2]
311
+ self.stride = stride
312
 
313
  # Define labels
314
  self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
 
587
 
588
  labels4 = []
589
  s = self.img_size
590
+ yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
591
  indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
592
  for i, index in enumerate(indices):
593
  # Load image
 
628
  # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
629
  np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
630
 
631
+ # Replicate
632
+ # img4, labels4 = replicate(img4, labels4)
633
+
634
  # Augment
635
  # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
636
  img4, labels4 = random_affine(img4, labels4,
 
638
  translate=self.hyp['translate'],
639
  scale=self.hyp['scale'],
640
  shear=self.hyp['shear'],
641
+ border=self.mosaic_border) # border to remove
642
 
643
  return img4, labels4
644
 
645
 
646
+ def replicate(img, labels):
647
+ # Replicate labels
648
+ h, w = img.shape[:2]
649
+ boxes = labels[:, 1:].astype(int)
650
+ x1, y1, x2, y2 = boxes.T
651
+ s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
652
+ for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
653
+ x1b, y1b, x2b, y2b = boxes[i]
654
+ bh, bw = y2b - y1b, x2b - x1b
655
+ yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
656
+ x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
657
+ img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
658
+ labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
659
+
660
+ return img, labels
661
+
662
+
663
+ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
664
  # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
665
  shape = img.shape[:2] # current shape [height, width]
666
  if isinstance(new_shape, int):
 
693
  return img, ratio, (dw, dh)
694
 
695
 
696
+ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
697
  # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
698
  # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
699
  # targets = [cls, xyxy]
700
 
701
+ height = img.shape[0] + border[0] * 2 # shape(h,w,c)
702
+ width = img.shape[1] + border[1] * 2
703
 
704
  # Rotation and Scale
705
  R = np.eye(3)
 
711
 
712
  # Translation
713
  T = np.eye(3)
714
+ T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
715
+ T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
716
 
717
  # Shear
718
  S = np.eye(3)
 
721
 
722
  # Combined rotation matrix
723
  M = S @ T @ R # ORDER IS IMPORTANT HERE!!
724
+ if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
725
  img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
726
 
727
  # Transform label coordinates
 
784
  box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
785
 
786
  # Intersection over box2 area
787
+
788
  return inter_area / box2_area
789
 
790
  # create random masks
utils/torch_utils.py CHANGED
@@ -54,6 +54,11 @@ def time_synchronized():
54
  return time.time()
55
 
56
 
 
 
 
 
 
57
  def initialize_weights(model):
58
  for m in model.modules():
59
  t = type(m)
@@ -71,16 +76,36 @@ def find_modules(model, mclass=nn.Conv2d):
71
  return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
72
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  def fuse_conv_and_bn(conv, bn):
75
  # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
76
  with torch.no_grad():
77
  # init
78
- fusedconv = torch.nn.Conv2d(conv.in_channels,
79
- conv.out_channels,
80
- kernel_size=conv.kernel_size,
81
- stride=conv.stride,
82
- padding=conv.padding,
83
- bias=True)
84
 
85
  # prepare filters
86
  w_conv = conv.weight.clone().view(conv.out_channels, -1)
@@ -88,10 +113,7 @@ def fuse_conv_and_bn(conv, bn):
88
  fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
89
 
90
  # prepare spatial bias
91
- if conv.bias is not None:
92
- b_conv = conv.bias
93
- else:
94
- b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device)
95
  b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
96
  fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
97
 
@@ -111,8 +133,8 @@ def model_info(model, verbose=False):
111
 
112
  try: # FLOPS
113
  from thop import profile
114
- macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
115
- fs = ', %.1f GFLOPS' % (macs / 1E9 * 2)
116
  except:
117
  fs = ''
118
 
@@ -134,8 +156,8 @@ def load_classifier(name='resnet101', n=2):
134
 
135
  # Reshape output to n classes
136
  filters = model.fc.weight.shape[1]
137
- model.fc.bias = torch.nn.Parameter(torch.zeros(n), requires_grad=True)
138
- model.fc.weight = torch.nn.Parameter(torch.zeros(n, filters), requires_grad=True)
139
  model.fc.out_features = n
140
  return model
141
 
@@ -170,33 +192,31 @@ class ModelEMA:
170
  """
171
 
172
  def __init__(self, model, decay=0.9999, device=''):
173
- # make a copy of the model for accumulating moving average of weights
174
- self.ema = deepcopy(model)
175
  self.ema.eval()
176
  self.updates = 0 # number of EMA updates
177
  self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
178
  self.device = device # perform ema on different device from model if set
179
  if device:
180
- self.ema.to(device=device)
181
  for p in self.ema.parameters():
182
  p.requires_grad_(False)
183
 
184
  def update(self, model):
185
- self.updates += 1
186
- d = self.decay(self.updates)
187
  with torch.no_grad():
188
- if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
189
- msd, esd = model.module.state_dict(), self.ema.module.state_dict()
190
- else:
191
- msd, esd = model.state_dict(), self.ema.state_dict()
192
 
193
- for k, v in esd.items():
 
194
  if v.dtype.is_floating_point:
195
  v *= d
196
  v += (1. - d) * msd[k].detach()
197
 
198
  def update_attr(self, model):
199
- # Assign attributes (which may change during training)
200
- for k in model.__dict__.keys():
201
- if not k.startswith('_'):
202
- setattr(self.ema, k, getattr(model, k))
 
54
  return time.time()
55
 
56
 
57
+ def is_parallel(model):
58
+ # is model is parallel with DP or DDP
59
+ return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
60
+
61
+
62
  def initialize_weights(model):
63
  for m in model.modules():
64
  t = type(m)
 
76
  return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
77
 
78
 
79
+ def sparsity(model):
80
+ # Return global model sparsity
81
+ a, b = 0., 0.
82
+ for p in model.parameters():
83
+ a += p.numel()
84
+ b += (p == 0).sum()
85
+ return b / a
86
+
87
+
88
+ def prune(model, amount=0.3):
89
+ # Prune model to requested global sparsity
90
+ import torch.nn.utils.prune as prune
91
+ print('Pruning model... ', end='')
92
+ for name, m in model.named_modules():
93
+ if isinstance(m, nn.Conv2d):
94
+ prune.l1_unstructured(m, name='weight', amount=amount) # prune
95
+ prune.remove(m, 'weight') # make permanent
96
+ print(' %.3g global sparsity' % sparsity(model))
97
+
98
+
99
  def fuse_conv_and_bn(conv, bn):
100
  # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
101
  with torch.no_grad():
102
  # init
103
+ fusedconv = nn.Conv2d(conv.in_channels,
104
+ conv.out_channels,
105
+ kernel_size=conv.kernel_size,
106
+ stride=conv.stride,
107
+ padding=conv.padding,
108
+ bias=True).to(conv.weight.device)
109
 
110
  # prepare filters
111
  w_conv = conv.weight.clone().view(conv.out_channels, -1)
 
113
  fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
114
 
115
  # prepare spatial bias
116
+ b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
 
 
 
117
  b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
118
  fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
119
 
 
133
 
134
  try: # FLOPS
135
  from thop import profile
136
+ flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2
137
+ fs = ', %.1f GFLOPS' % (flops * 100) # 640x640 FLOPS
138
  except:
139
  fs = ''
140
 
 
156
 
157
  # Reshape output to n classes
158
  filters = model.fc.weight.shape[1]
159
+ model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
160
+ model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
161
  model.fc.out_features = n
162
  return model
163
 
 
192
  """
193
 
194
  def __init__(self, model, decay=0.9999, device=''):
195
+ # Create EMA
196
+ self.ema = deepcopy(model.module if is_parallel(model) else model) # FP32 EMA
197
  self.ema.eval()
198
  self.updates = 0 # number of EMA updates
199
  self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
200
  self.device = device # perform ema on different device from model if set
201
  if device:
202
+ self.ema.to(device)
203
  for p in self.ema.parameters():
204
  p.requires_grad_(False)
205
 
206
  def update(self, model):
207
+ # Update EMA parameters
 
208
  with torch.no_grad():
209
+ self.updates += 1
210
+ d = self.decay(self.updates)
 
 
211
 
212
+ msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
213
+ for k, v in self.ema.state_dict().items():
214
  if v.dtype.is_floating_point:
215
  v *= d
216
  v += (1. - d) * msd[k].detach()
217
 
218
  def update_attr(self, model):
219
+ # Update EMA attributes
220
+ for k, v in model.__dict__.items():
221
+ if not k.startswith('_') and k not in ["process_group", "reducer"]:
222
+ setattr(self.ema, k, v)
utils/utils.py CHANGED
@@ -53,7 +53,7 @@ def check_git_status():
53
 
54
  def check_img_size(img_size, s=32):
55
  # Verify img_size is a multiple of stride s
56
- new_size = make_divisible(img_size, s) # ceil gs-multiple
57
  if new_size != img_size:
58
  print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
59
  return new_size
@@ -443,7 +443,9 @@ def compute_loss(p, targets, model): # predictions, targets, model
443
  BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
444
 
445
  # per output
446
- nt = 0 # targets
 
 
447
  for i, pi in enumerate(p): # layer index, layer predictions
448
  b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
449
  tobj = torch.zeros_like(pi[..., 0]) # target obj
@@ -473,11 +475,12 @@ def compute_loss(p, targets, model): # predictions, targets, model
473
  # with open('targets.txt', 'a') as file:
474
  # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
475
 
476
- lobj += BCEobj(pi[..., 4], tobj) # obj loss
477
 
478
- lbox *= h['giou']
479
- lobj *= h['obj']
480
- lcls *= h['cls']
 
481
  bs = tobj.shape[0] # batch size
482
  if red == 'sum':
483
  g = 3.0 # loss gain
@@ -514,16 +517,14 @@ def build_targets(p, targets, model):
514
  a, t = at[j], t.repeat(na, 1, 1)[j] # filter
515
 
516
  # overlaps
 
517
  gxy = t[:, 2:4] # grid xy
518
  z = torch.zeros_like(gxy)
519
  if style == 'rect2':
520
- g = 0.2 # offset
521
  j, k = ((gxy % 1. < g) & (gxy > 1.)).T
522
  a, t = torch.cat((a, a[j], a[k]), 0), torch.cat((t, t[j], t[k]), 0)
523
  offsets = torch.cat((z, z[j] + off[0], z[k] + off[1]), 0) * g
524
-
525
  elif style == 'rect4':
526
- g = 0.5 # offset
527
  j, k = ((gxy % 1. < g) & (gxy > 1.)).T
528
  l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
529
  a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
@@ -770,11 +771,11 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
770
  wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
771
 
772
  # Filter
773
- i = (wh0 < 4.0).any(1).sum()
774
  if i:
775
  print('WARNING: Extremely small objects found. '
776
- '%g of %g labels are < 4 pixels in width or height.' % (i, len(wh0)))
777
- wh = wh0[(wh0 >= 4.0).any(1)] # filter > 2 pixels
778
 
779
  # Kmeans calculation
780
  from scipy.cluster.vq import kmeans
 
53
 
54
  def check_img_size(img_size, s=32):
55
  # Verify img_size is a multiple of stride s
56
+ new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
57
  if new_size != img_size:
58
  print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
59
  return new_size
 
443
  BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
444
 
445
  # per output
446
+ nt = 0 # number of targets
447
+ np = len(p) # number of outputs
448
+ balance = [1.0, 1.0, 1.0]
449
  for i, pi in enumerate(p): # layer index, layer predictions
450
  b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
451
  tobj = torch.zeros_like(pi[..., 0]) # target obj
 
475
  # with open('targets.txt', 'a') as file:
476
  # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
477
 
478
+ lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
479
 
480
+ s = 3 / np # output count scaling
481
+ lbox *= h['giou'] * s
482
+ lobj *= h['obj'] * s
483
+ lcls *= h['cls'] * s
484
  bs = tobj.shape[0] # batch size
485
  if red == 'sum':
486
  g = 3.0 # loss gain
 
517
  a, t = at[j], t.repeat(na, 1, 1)[j] # filter
518
 
519
  # overlaps
520
+ g = 0.5 # offset
521
  gxy = t[:, 2:4] # grid xy
522
  z = torch.zeros_like(gxy)
523
  if style == 'rect2':
 
524
  j, k = ((gxy % 1. < g) & (gxy > 1.)).T
525
  a, t = torch.cat((a, a[j], a[k]), 0), torch.cat((t, t[j], t[k]), 0)
526
  offsets = torch.cat((z, z[j] + off[0], z[k] + off[1]), 0) * g
 
527
  elif style == 'rect4':
 
528
  j, k = ((gxy % 1. < g) & (gxy > 1.)).T
529
  l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
530
  a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
 
771
  wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
772
 
773
  # Filter
774
+ i = (wh0 < 3.0).any(1).sum()
775
  if i:
776
  print('WARNING: Extremely small objects found. '
777
+ '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
778
+ wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
779
 
780
  # Kmeans calculation
781
  from scipy.cluster.vq import kmeans