Merge branch 'master' into advanced_logging
Browse files- .dockerignore +2 -0
- .gitignore +1 -0
- README.md +6 -2
- data/get_coco2017.sh +7 -3
- data/get_voc.sh +214 -0
- data/voc.yaml +18 -0
- detect.py +12 -14
- models/common.py +12 -7
- models/experimental.py +47 -11
- models/export.py +72 -0
- models/onnx_export.py +0 -42
- models/yolo.py +17 -11
- test.py +3 -6
- train.py +34 -21
- utils/datasets.py +36 -13
- utils/torch_utils.py +48 -28
- utils/utils.py +13 -12
.dockerignore
CHANGED
@@ -14,8 +14,10 @@ data/samples/*
|
|
14 |
# Neural Network weights -----------------------------------------------------------------------------------------------
|
15 |
**/*.weights
|
16 |
**/*.pt
|
|
|
17 |
**/*.onnx
|
18 |
**/*.mlmodel
|
|
|
19 |
|
20 |
|
21 |
# Below Copied From .gitignore -----------------------------------------------------------------------------------------
|
|
|
14 |
# Neural Network weights -----------------------------------------------------------------------------------------------
|
15 |
**/*.weights
|
16 |
**/*.pt
|
17 |
+
**/*.pth
|
18 |
**/*.onnx
|
19 |
**/*.mlmodel
|
20 |
+
**/*.torchscript
|
21 |
|
22 |
|
23 |
# Below Copied From .gitignore -----------------------------------------------------------------------------------------
|
.gitignore
CHANGED
@@ -50,6 +50,7 @@ gcp_test*.sh
|
|
50 |
*.pt
|
51 |
*.onnx
|
52 |
*.mlmodel
|
|
|
53 |
darknet53.conv.74
|
54 |
yolov3-tiny.conv.15
|
55 |
|
|
|
50 |
*.pt
|
51 |
*.onnx
|
52 |
*.mlmodel
|
53 |
+
*.torchscript
|
54 |
darknet53.conv.74
|
55 |
yolov3-tiny.conv.15
|
56 |
|
README.md
CHANGED
@@ -41,9 +41,13 @@ $ pip install -U -r requirements.txt
|
|
41 |
## Tutorials
|
42 |
|
43 |
* [Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb) <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
|
|
|
44 |
* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)
|
45 |
-
* [
|
46 |
-
* [
|
|
|
|
|
|
|
47 |
|
48 |
|
49 |
## Inference
|
|
|
41 |
## Tutorials
|
42 |
|
43 |
* [Notebook](https://github.com/ultralytics/yolov5/blob/master/tutorial.ipynb) <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
|
44 |
+
* [Kaggle](https://www.kaggle.com/ultralytics/yolov5-tutorial)
|
45 |
* [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)
|
46 |
+
* [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)
|
47 |
+
* [ONNX and TorchScript Export](https://github.com/ultralytics/yolov5/issues/251)
|
48 |
+
* [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303)
|
49 |
+
* [Google Cloud Quickstart](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
|
50 |
+
* [Docker Quickstart](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) 
|
51 |
|
52 |
|
53 |
## Inference
|
data/get_coco2017.sh
CHANGED
@@ -1,7 +1,11 @@
|
|
1 |
#!/bin/bash
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Download labels from Google Drive, accepting presented query
|
7 |
filename="coco2017labels.zip"
|
|
|
1 |
#!/bin/bash
|
2 |
+
# COCO 2017 dataset http://cocodataset.org
|
3 |
+
# Download command: bash yolov5/data/get_coco2017.sh
|
4 |
+
# Train command: python train.py --data ./data/coco.yaml
|
5 |
+
# Dataset should be placed next to yolov5 folder:
|
6 |
+
# /parent_folder
|
7 |
+
# /coco
|
8 |
+
# /yolov5
|
9 |
|
10 |
# Download labels from Google Drive, accepting presented query
|
11 |
filename="coco2017labels.zip"
|
data/get_voc.sh
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
|
2 |
+
# Download command: bash ./data/get_voc.sh
|
3 |
+
# Train command: python train.py --data voc.yaml
|
4 |
+
# Dataset should be placed next to yolov5 folder:
|
5 |
+
# /parent_folder
|
6 |
+
# /VOC
|
7 |
+
# /yolov5
|
8 |
+
|
9 |
+
start=`date +%s`
|
10 |
+
|
11 |
+
# handle optional download dir
|
12 |
+
if [ -z "$1" ]
|
13 |
+
then
|
14 |
+
# navigate to ~/tmp
|
15 |
+
echo "navigating to ../tmp/ ..."
|
16 |
+
mkdir -p ../tmp
|
17 |
+
cd ../tmp/
|
18 |
+
else
|
19 |
+
# check if is valid directory
|
20 |
+
if [ ! -d $1 ]; then
|
21 |
+
echo $1 "is not a valid directory"
|
22 |
+
exit 0
|
23 |
+
fi
|
24 |
+
echo "navigating to" $1 "..."
|
25 |
+
cd $1
|
26 |
+
fi
|
27 |
+
|
28 |
+
echo "Downloading VOC2007 trainval ..."
|
29 |
+
# Download the data.
|
30 |
+
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
|
31 |
+
echo "Downloading VOC2007 test data ..."
|
32 |
+
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
|
33 |
+
echo "Done downloading."
|
34 |
+
|
35 |
+
# Extract data
|
36 |
+
echo "Extracting trainval ..."
|
37 |
+
tar -xf VOCtrainval_06-Nov-2007.tar
|
38 |
+
echo "Extracting test ..."
|
39 |
+
tar -xf VOCtest_06-Nov-2007.tar
|
40 |
+
echo "removing tars ..."
|
41 |
+
rm VOCtrainval_06-Nov-2007.tar
|
42 |
+
rm VOCtest_06-Nov-2007.tar
|
43 |
+
|
44 |
+
end=`date +%s`
|
45 |
+
runtime=$((end-start))
|
46 |
+
|
47 |
+
echo "Completed in" $runtime "seconds"
|
48 |
+
|
49 |
+
start=`date +%s`
|
50 |
+
|
51 |
+
# handle optional download dir
|
52 |
+
if [ -z "$1" ]
|
53 |
+
then
|
54 |
+
# navigate to ~/tmp
|
55 |
+
echo "navigating to ../tmp/ ..."
|
56 |
+
mkdir -p ../tmp
|
57 |
+
cd ../tmp/
|
58 |
+
else
|
59 |
+
# check if is valid directory
|
60 |
+
if [ ! -d $1 ]; then
|
61 |
+
echo $1 "is not a valid directory"
|
62 |
+
exit 0
|
63 |
+
fi
|
64 |
+
echo "navigating to" $1 "..."
|
65 |
+
cd $1
|
66 |
+
fi
|
67 |
+
|
68 |
+
echo "Downloading VOC2012 trainval ..."
|
69 |
+
# Download the data.
|
70 |
+
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
|
71 |
+
echo "Done downloading."
|
72 |
+
|
73 |
+
|
74 |
+
# Extract data
|
75 |
+
echo "Extracting trainval ..."
|
76 |
+
tar -xf VOCtrainval_11-May-2012.tar
|
77 |
+
echo "removing tar ..."
|
78 |
+
rm VOCtrainval_11-May-2012.tar
|
79 |
+
|
80 |
+
end=`date +%s`
|
81 |
+
runtime=$((end-start))
|
82 |
+
|
83 |
+
echo "Completed in" $runtime "seconds"
|
84 |
+
|
85 |
+
cd ../tmp
|
86 |
+
echo "Spliting dataset..."
|
87 |
+
python3 - "$@" <<END
|
88 |
+
import xml.etree.ElementTree as ET
|
89 |
+
import pickle
|
90 |
+
import os
|
91 |
+
from os import listdir, getcwd
|
92 |
+
from os.path import join
|
93 |
+
|
94 |
+
sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
|
95 |
+
|
96 |
+
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
|
97 |
+
|
98 |
+
|
99 |
+
def convert(size, box):
|
100 |
+
dw = 1./(size[0])
|
101 |
+
dh = 1./(size[1])
|
102 |
+
x = (box[0] + box[1])/2.0 - 1
|
103 |
+
y = (box[2] + box[3])/2.0 - 1
|
104 |
+
w = box[1] - box[0]
|
105 |
+
h = box[3] - box[2]
|
106 |
+
x = x*dw
|
107 |
+
w = w*dw
|
108 |
+
y = y*dh
|
109 |
+
h = h*dh
|
110 |
+
return (x,y,w,h)
|
111 |
+
|
112 |
+
def convert_annotation(year, image_id):
|
113 |
+
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
|
114 |
+
out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
|
115 |
+
tree=ET.parse(in_file)
|
116 |
+
root = tree.getroot()
|
117 |
+
size = root.find('size')
|
118 |
+
w = int(size.find('width').text)
|
119 |
+
h = int(size.find('height').text)
|
120 |
+
|
121 |
+
for obj in root.iter('object'):
|
122 |
+
difficult = obj.find('difficult').text
|
123 |
+
cls = obj.find('name').text
|
124 |
+
if cls not in classes or int(difficult)==1:
|
125 |
+
continue
|
126 |
+
cls_id = classes.index(cls)
|
127 |
+
xmlbox = obj.find('bndbox')
|
128 |
+
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
|
129 |
+
bb = convert((w,h), b)
|
130 |
+
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
|
131 |
+
|
132 |
+
wd = getcwd()
|
133 |
+
|
134 |
+
for year, image_set in sets:
|
135 |
+
if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
|
136 |
+
os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
|
137 |
+
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
|
138 |
+
list_file = open('%s_%s.txt'%(year, image_set), 'w')
|
139 |
+
for image_id in image_ids:
|
140 |
+
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
|
141 |
+
convert_annotation(year, image_id)
|
142 |
+
list_file.close()
|
143 |
+
|
144 |
+
END
|
145 |
+
|
146 |
+
cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt
|
147 |
+
cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt
|
148 |
+
|
149 |
+
python3 - "$@" <<END
|
150 |
+
|
151 |
+
import shutil
|
152 |
+
import os
|
153 |
+
os.system('mkdir ../VOC/')
|
154 |
+
os.system('mkdir ../VOC/images')
|
155 |
+
os.system('mkdir ../VOC/images/train')
|
156 |
+
os.system('mkdir ../VOC/images/val')
|
157 |
+
|
158 |
+
os.system('mkdir ../VOC/labels')
|
159 |
+
os.system('mkdir ../VOC/labels/train')
|
160 |
+
os.system('mkdir ../VOC/labels/val')
|
161 |
+
|
162 |
+
import os
|
163 |
+
print(os.path.exists('../tmp/train.txt'))
|
164 |
+
f = open('../tmp/train.txt', 'r')
|
165 |
+
lines = f.readlines()
|
166 |
+
|
167 |
+
for line in lines:
|
168 |
+
#print(line.split('/')[-1][:-1])
|
169 |
+
line = "/".join(line.split('/')[2:])
|
170 |
+
#print(line)
|
171 |
+
if (os.path.exists("../" + line[:-1])):
|
172 |
+
os.system("cp ../"+ line[:-1] + " ../VOC/images/train")
|
173 |
+
|
174 |
+
print(os.path.exists('../tmp/train.txt'))
|
175 |
+
f = open('../tmp/train.txt', 'r')
|
176 |
+
lines = f.readlines()
|
177 |
+
|
178 |
+
for line in lines:
|
179 |
+
#print(line.split('/')[-1][:-1])
|
180 |
+
line = "/".join(line.split('/')[2:])
|
181 |
+
line = line.replace('JPEGImages', 'labels')
|
182 |
+
line = line.replace('jpg', 'txt')
|
183 |
+
#print(line)
|
184 |
+
if (os.path.exists("../" + line[:-1])):
|
185 |
+
os.system("cp ../"+ line[:-1] + " ../VOC/labels/train")
|
186 |
+
|
187 |
+
print(os.path.exists('../tmp/2007_test.txt'))
|
188 |
+
f = open('../tmp/2007_test.txt', 'r')
|
189 |
+
lines = f.readlines()
|
190 |
+
|
191 |
+
for line in lines:
|
192 |
+
#print(line.split('/')[-1][:-1])
|
193 |
+
line = "/".join(line.split('/')[2:])
|
194 |
+
|
195 |
+
if (os.path.exists("../" + line[:-1])):
|
196 |
+
os.system("cp ../"+ line[:-1] + " ../VOC/images/val")
|
197 |
+
|
198 |
+
print(os.path.exists('../tmp/2007_test.txt'))
|
199 |
+
f = open('../tmp/2007_test.txt', 'r')
|
200 |
+
lines = f.readlines()
|
201 |
+
|
202 |
+
for line in lines:
|
203 |
+
#print(line.split('/')[-1][:-1])
|
204 |
+
line = "/".join(line.split('/')[2:])
|
205 |
+
line = line.replace('JPEGImages', 'labels')
|
206 |
+
line = line.replace('jpg', 'txt')
|
207 |
+
#print(line)
|
208 |
+
if (os.path.exists("../" + line[:-1])):
|
209 |
+
os.system("cp ../"+ line[:-1] + " ../VOC/labels/val")
|
210 |
+
|
211 |
+
END
|
212 |
+
|
213 |
+
rm -rf ../tmp # remove temporary directory
|
214 |
+
echo "VOC download done."
|
data/voc.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
|
2 |
+
# Download command: bash ./data/get_voc.sh
|
3 |
+
# Train command: python train.py --data voc.yaml
|
4 |
+
# Dataset should be placed next to yolov5 folder:
|
5 |
+
# /parent_folder
|
6 |
+
# /VOC
|
7 |
+
# /yolov5
|
8 |
+
|
9 |
+
# train and val datasets (image directory or *.txt file with image paths)
|
10 |
+
train: ../VOC/images/train/
|
11 |
+
val: ../VOC/images/val/
|
12 |
+
|
13 |
+
# number of classes
|
14 |
+
nc: 20
|
15 |
+
|
16 |
+
# class names
|
17 |
+
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
|
18 |
+
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
|
detect.py
CHANGED
@@ -21,10 +21,8 @@ def detect(save_img=False):
|
|
21 |
|
22 |
# Load model
|
23 |
google_utils.attempt_download(weights)
|
24 |
-
model = torch.load(weights, map_location=device)['model'].float() # load
|
25 |
-
|
26 |
-
# model.fuse()
|
27 |
-
model.to(device).eval()
|
28 |
if half:
|
29 |
model.half() # to FP16
|
30 |
|
@@ -82,7 +80,7 @@ def detect(save_img=False):
|
|
82 |
save_path = str(Path(out) / Path(p).name)
|
83 |
txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
|
84 |
s += '%gx%g ' % img.shape[2:] # print string
|
85 |
-
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] #
|
86 |
if det is not None and len(det):
|
87 |
# Rescale boxes from img_size to im0 size
|
88 |
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
@@ -122,10 +120,11 @@ def detect(save_img=False):
|
|
122 |
if isinstance(vid_writer, cv2.VideoWriter):
|
123 |
vid_writer.release() # release previous video writer
|
124 |
|
|
|
125 |
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
126 |
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
127 |
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
128 |
-
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*
|
129 |
vid_writer.write(im0)
|
130 |
|
131 |
if save_txt or save_img:
|
@@ -144,21 +143,20 @@ if __name__ == '__main__':
|
|
144 |
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
|
145 |
parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
|
146 |
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
|
147 |
-
parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
|
148 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
149 |
parser.add_argument('--view-img', action='store_true', help='display results')
|
150 |
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
151 |
parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
|
152 |
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
153 |
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
|
|
154 |
opt = parser.parse_args()
|
155 |
-
opt.img_size = check_img_size(opt.img_size)
|
156 |
print(opt)
|
157 |
|
158 |
with torch.no_grad():
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
21 |
|
22 |
# Load model
|
23 |
google_utils.attempt_download(weights)
|
24 |
+
model = torch.load(weights, map_location=device)['model'].float().eval() # load FP32 model
|
25 |
+
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
|
|
|
|
|
26 |
if half:
|
27 |
model.half() # to FP16
|
28 |
|
|
|
80 |
save_path = str(Path(out) / Path(p).name)
|
81 |
txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
|
82 |
s += '%gx%g ' % img.shape[2:] # print string
|
83 |
+
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
84 |
if det is not None and len(det):
|
85 |
# Rescale boxes from img_size to im0 size
|
86 |
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
|
|
|
120 |
if isinstance(vid_writer, cv2.VideoWriter):
|
121 |
vid_writer.release() # release previous video writer
|
122 |
|
123 |
+
fourcc = 'mp4v' # output video codec
|
124 |
fps = vid_cap.get(cv2.CAP_PROP_FPS)
|
125 |
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
126 |
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
127 |
+
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
|
128 |
vid_writer.write(im0)
|
129 |
|
130 |
if save_txt or save_img:
|
|
|
143 |
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
|
144 |
parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
|
145 |
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
|
|
|
146 |
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
147 |
parser.add_argument('--view-img', action='store_true', help='display results')
|
148 |
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
|
149 |
parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
|
150 |
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
|
151 |
parser.add_argument('--augment', action='store_true', help='augmented inference')
|
152 |
+
parser.add_argument('--update', action='store_true', help='update all models')
|
153 |
opt = parser.parse_args()
|
|
|
154 |
print(opt)
|
155 |
|
156 |
with torch.no_grad():
|
157 |
+
if opt.update: # update all models (to fix SourceChangeWarning)
|
158 |
+
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
|
159 |
+
detect()
|
160 |
+
create_pretrained(opt.weights, opt.weights)
|
161 |
+
else:
|
162 |
+
detect()
|
models/common.py
CHANGED
@@ -1,9 +1,15 @@
|
|
1 |
# This file contains modules common to various models
|
2 |
|
3 |
-
|
4 |
from utils.utils import *
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def DWConv(c1, c2, k=1, s=1, act=True):
|
8 |
# Depthwise convolution
|
9 |
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
@@ -11,10 +17,9 @@ def DWConv(c1, c2, k=1, s=1, act=True):
|
|
11 |
|
12 |
class Conv(nn.Module):
|
13 |
# Standard convolution
|
14 |
-
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
15 |
super(Conv, self).__init__()
|
16 |
-
|
17 |
-
self.conv = nn.Conv2d(c1, c2, k, s, p, groups=g, bias=False)
|
18 |
self.bn = nn.BatchNorm2d(c2)
|
19 |
self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
|
20 |
|
@@ -46,7 +51,7 @@ class BottleneckCSP(nn.Module):
|
|
46 |
self.cv1 = Conv(c1, c_, 1, 1)
|
47 |
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
48 |
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
49 |
-
self.cv4 = Conv(
|
50 |
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
51 |
self.act = nn.LeakyReLU(0.1, inplace=True)
|
52 |
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
@@ -79,9 +84,9 @@ class Flatten(nn.Module):
|
|
79 |
|
80 |
class Focus(nn.Module):
|
81 |
# Focus wh information into c-space
|
82 |
-
def __init__(self, c1, c2, k=1):
|
83 |
super(Focus, self).__init__()
|
84 |
-
self.conv = Conv(c1 * 4, c2, k,
|
85 |
|
86 |
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
87 |
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
|
|
1 |
# This file contains modules common to various models
|
2 |
|
|
|
3 |
from utils.utils import *
|
4 |
|
5 |
|
6 |
+
def autopad(k, p=None): # kernel, padding
|
7 |
+
# Pad to 'same'
|
8 |
+
if p is None:
|
9 |
+
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
10 |
+
return p
|
11 |
+
|
12 |
+
|
13 |
def DWConv(c1, c2, k=1, s=1, act=True):
|
14 |
# Depthwise convolution
|
15 |
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
|
|
17 |
|
18 |
class Conv(nn.Module):
|
19 |
# Standard convolution
|
20 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
21 |
super(Conv, self).__init__()
|
22 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
|
|
23 |
self.bn = nn.BatchNorm2d(c2)
|
24 |
self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
|
25 |
|
|
|
51 |
self.cv1 = Conv(c1, c_, 1, 1)
|
52 |
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
53 |
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
54 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
55 |
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
56 |
self.act = nn.LeakyReLU(0.1, inplace=True)
|
57 |
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
|
|
84 |
|
85 |
class Focus(nn.Module):
|
86 |
# Focus wh information into c-space
|
87 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
88 |
super(Focus, self).__init__()
|
89 |
+
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
90 |
|
91 |
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
92 |
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
models/experimental.py
CHANGED
@@ -1,6 +1,41 @@
|
|
|
|
|
|
1 |
from models.common import *
|
2 |
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
class Sum(nn.Module):
|
5 |
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
6 |
def __init__(self, n, weight=False): # n: number of inputs
|
@@ -50,17 +85,6 @@ class GhostBottleneck(nn.Module):
|
|
50 |
return self.conv(x) + self.shortcut(x)
|
51 |
|
52 |
|
53 |
-
class ConvPlus(nn.Module):
|
54 |
-
# Plus-shaped convolution
|
55 |
-
def __init__(self, c1, c2, k=3, s=1, g=1, bias=True): # ch_in, ch_out, kernel, stride, groups
|
56 |
-
super(ConvPlus, self).__init__()
|
57 |
-
self.cv1 = nn.Conv2d(c1, c2, (k, 1), s, (k // 2, 0), groups=g, bias=bias)
|
58 |
-
self.cv2 = nn.Conv2d(c1, c2, (1, k), s, (0, k // 2), groups=g, bias=bias)
|
59 |
-
|
60 |
-
def forward(self, x):
|
61 |
-
return self.cv1(x) + self.cv2(x)
|
62 |
-
|
63 |
-
|
64 |
class MixConv2d(nn.Module):
|
65 |
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
66 |
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
@@ -83,3 +107,15 @@ class MixConv2d(nn.Module):
|
|
83 |
|
84 |
def forward(self, x):
|
85 |
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file contains experimental modules
|
2 |
+
|
3 |
from models.common import *
|
4 |
|
5 |
|
6 |
+
class CrossConv(nn.Module):
|
7 |
+
# Cross Convolution Downsample
|
8 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
9 |
+
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
10 |
+
super(CrossConv, self).__init__()
|
11 |
+
c_ = int(c2 * e) # hidden channels
|
12 |
+
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
13 |
+
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
14 |
+
self.add = shortcut and c1 == c2
|
15 |
+
|
16 |
+
def forward(self, x):
|
17 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
18 |
+
|
19 |
+
|
20 |
+
class C3(nn.Module):
|
21 |
+
# Cross Convolution CSP
|
22 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
23 |
+
super(C3, self).__init__()
|
24 |
+
c_ = int(c2 * e) # hidden channels
|
25 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
26 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
27 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
28 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
29 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
30 |
+
self.act = nn.LeakyReLU(0.1, inplace=True)
|
31 |
+
self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
32 |
+
|
33 |
+
def forward(self, x):
|
34 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
35 |
+
y2 = self.cv2(x)
|
36 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
37 |
+
|
38 |
+
|
39 |
class Sum(nn.Module):
|
40 |
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
41 |
def __init__(self, n, weight=False): # n: number of inputs
|
|
|
85 |
return self.conv(x) + self.shortcut(x)
|
86 |
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
class MixConv2d(nn.Module):
|
89 |
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
90 |
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
|
|
107 |
|
108 |
def forward(self, x):
|
109 |
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
110 |
+
|
111 |
+
|
112 |
+
class Ensemble(nn.ModuleList):
|
113 |
+
# Ensemble of models
|
114 |
+
def __init__(self):
|
115 |
+
super(Ensemble, self).__init__()
|
116 |
+
|
117 |
+
def forward(self, x, augment=False):
|
118 |
+
y = []
|
119 |
+
for module in self:
|
120 |
+
y.append(module(x, augment)[0])
|
121 |
+
return torch.cat(y, 1), None # ensembled inference output, train output
|
models/export.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
|
2 |
+
|
3 |
+
Usage:
|
4 |
+
$ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
|
5 |
+
"""
|
6 |
+
|
7 |
+
import argparse
|
8 |
+
|
9 |
+
from models.common import *
|
10 |
+
from utils import google_utils
|
11 |
+
|
12 |
+
if __name__ == '__main__':
|
13 |
+
parser = argparse.ArgumentParser()
|
14 |
+
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
|
15 |
+
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
|
16 |
+
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
17 |
+
opt = parser.parse_args()
|
18 |
+
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
|
19 |
+
print(opt)
|
20 |
+
|
21 |
+
# Input
|
22 |
+
img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
|
23 |
+
|
24 |
+
# Load PyTorch model
|
25 |
+
google_utils.attempt_download(opt.weights)
|
26 |
+
model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
|
27 |
+
model.eval()
|
28 |
+
model.model[-1].export = True # set Detect() layer export=True
|
29 |
+
y = model(img) # dry run
|
30 |
+
|
31 |
+
# TorchScript export
|
32 |
+
try:
|
33 |
+
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
|
34 |
+
f = opt.weights.replace('.pt', '.torchscript') # filename
|
35 |
+
ts = torch.jit.trace(model, img)
|
36 |
+
ts.save(f)
|
37 |
+
print('TorchScript export success, saved as %s' % f)
|
38 |
+
except Exception as e:
|
39 |
+
print('TorchScript export failure: %s' % e)
|
40 |
+
|
41 |
+
# ONNX export
|
42 |
+
try:
|
43 |
+
import onnx
|
44 |
+
|
45 |
+
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
|
46 |
+
f = opt.weights.replace('.pt', '.onnx') # filename
|
47 |
+
model.fuse() # only for ONNX
|
48 |
+
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
|
49 |
+
output_names=['classes', 'boxes'] if y is None else ['output'])
|
50 |
+
|
51 |
+
# Checks
|
52 |
+
onnx_model = onnx.load(f) # load onnx model
|
53 |
+
onnx.checker.check_model(onnx_model) # check onnx model
|
54 |
+
print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
|
55 |
+
print('ONNX export success, saved as %s' % f)
|
56 |
+
except Exception as e:
|
57 |
+
print('ONNX export failure: %s' % e)
|
58 |
+
|
59 |
+
# CoreML export
|
60 |
+
try:
|
61 |
+
import coremltools as ct
|
62 |
+
|
63 |
+
print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
|
64 |
+
model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape)]) # convert
|
65 |
+
f = opt.weights.replace('.pt', '.mlmodel') # filename
|
66 |
+
model.save(f)
|
67 |
+
print('CoreML export success, saved as %s' % f)
|
68 |
+
except Exception as e:
|
69 |
+
print('CoreML export failure: %s' % e)
|
70 |
+
|
71 |
+
# Finish
|
72 |
+
print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
|
models/onnx_export.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
"""Exports a pytorch *.pt model to *.onnx format
|
2 |
-
|
3 |
-
Usage:
|
4 |
-
$ export PYTHONPATH="$PWD" && python models/onnx_export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
|
5 |
-
"""
|
6 |
-
|
7 |
-
import argparse
|
8 |
-
|
9 |
-
import onnx
|
10 |
-
|
11 |
-
from models.common import *
|
12 |
-
from utils import google_utils
|
13 |
-
|
14 |
-
if __name__ == '__main__':
|
15 |
-
parser = argparse.ArgumentParser()
|
16 |
-
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
|
17 |
-
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
|
18 |
-
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
19 |
-
opt = parser.parse_args()
|
20 |
-
print(opt)
|
21 |
-
|
22 |
-
# Parameters
|
23 |
-
f = opt.weights.replace('.pt', '.onnx') # onnx filename
|
24 |
-
img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size, (1, 3, 320, 192) iDetection
|
25 |
-
|
26 |
-
# Load pytorch model
|
27 |
-
google_utils.attempt_download(opt.weights)
|
28 |
-
model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
|
29 |
-
model.eval()
|
30 |
-
model.fuse()
|
31 |
-
|
32 |
-
# Export to onnx
|
33 |
-
model.model[-1].export = True # set Detect() layer export=True
|
34 |
-
_ = model(img) # dry run
|
35 |
-
torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'],
|
36 |
-
output_names=['output']) # output_names=['classes', 'boxes']
|
37 |
-
|
38 |
-
# Check onnx model
|
39 |
-
model = onnx.load(f) # load onnx model
|
40 |
-
onnx.checker.check_model(model) # check onnx model
|
41 |
-
print(onnx.helper.printable_graph(model.graph)) # print a human readable representation of the graph
|
42 |
-
print('Export complete. ONNX model saved to %s\nView with https://github.com/lutzroeder/netron' % f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/yolo.py
CHANGED
@@ -48,21 +48,27 @@ class Model(nn.Module):
|
|
48 |
if type(model_cfg) is dict:
|
49 |
self.md = model_cfg # model dict
|
50 |
else: # is *.yaml
|
|
|
51 |
with open(model_cfg) as f:
|
52 |
self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
53 |
|
54 |
# Define model
|
55 |
-
if nc:
|
|
|
56 |
self.md['nc'] = nc # override yaml value
|
57 |
self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out
|
58 |
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
59 |
|
60 |
# Build strides, anchors
|
61 |
m = self.model[-1] # Detect()
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# Init weights, biases
|
68 |
torch_utils.initialize_weights(self)
|
@@ -136,17 +142,17 @@ class Model(nn.Module):
|
|
136 |
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
137 |
|
138 |
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
139 |
-
print('Fusing layers...')
|
140 |
for m in self.model.modules():
|
141 |
if type(m) is Conv:
|
142 |
m.conv = torch_utils.fuse_conv_and_bn(m.conv, m.bn) # update conv
|
143 |
m.bn = None # remove batchnorm
|
144 |
m.forward = m.fuseforward # update forward
|
145 |
torch_utils.model_info(self)
|
146 |
-
|
147 |
|
148 |
def parse_model(md, ch): # model_dict, input_channels(3)
|
149 |
-
print('\n%3s%
|
150 |
anchors, nc, gd, gw = md['anchors'], md['nc'], md['depth_multiple'], md['width_multiple']
|
151 |
na = (len(anchors[0]) // 2) # number of anchors
|
152 |
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
@@ -161,7 +167,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
|
|
161 |
pass
|
162 |
|
163 |
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
164 |
-
if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus,
|
165 |
c1, c2 = ch[f], args[0]
|
166 |
|
167 |
# Normal
|
@@ -182,7 +188,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
|
|
182 |
# c2 = make_divisible(c2, 8) if c2 != no else c2
|
183 |
|
184 |
args = [c1, c2, *args[1:]]
|
185 |
-
if m
|
186 |
args.insert(2, n)
|
187 |
n = 1
|
188 |
elif m is nn.BatchNorm2d:
|
@@ -198,7 +204,7 @@ def parse_model(md, ch): # model_dict, input_channels(3)
|
|
198 |
t = str(m)[8:-2].replace('__main__.', '') # module type
|
199 |
np = sum([x.numel() for x in m_.parameters()]) # number params
|
200 |
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
201 |
-
print('%3s%
|
202 |
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
203 |
layers.append(m_)
|
204 |
ch.append(c2)
|
|
|
48 |
if type(model_cfg) is dict:
|
49 |
self.md = model_cfg # model dict
|
50 |
else: # is *.yaml
|
51 |
+
import yaml # for torch hub
|
52 |
with open(model_cfg) as f:
|
53 |
self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
54 |
|
55 |
# Define model
|
56 |
+
if nc and nc != self.md['nc']:
|
57 |
+
print('Overriding %s nc=%g with nc=%g' % (model_cfg, self.md['nc'], nc))
|
58 |
self.md['nc'] = nc # override yaml value
|
59 |
self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out
|
60 |
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
61 |
|
62 |
# Build strides, anchors
|
63 |
m = self.model[-1] # Detect()
|
64 |
+
if isinstance(m, Detect):
|
65 |
+
s = 128 # 2x min stride
|
66 |
+
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
67 |
+
m.anchors /= m.stride.view(-1, 1, 1)
|
68 |
+
check_anchor_order(m)
|
69 |
+
self.stride = m.stride
|
70 |
+
self._initialize_biases() # only run once
|
71 |
+
# print('Strides: %s' % m.stride.tolist())
|
72 |
|
73 |
# Init weights, biases
|
74 |
torch_utils.initialize_weights(self)
|
|
|
142 |
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
143 |
|
144 |
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
145 |
+
print('Fusing layers... ', end='')
|
146 |
for m in self.model.modules():
|
147 |
if type(m) is Conv:
|
148 |
m.conv = torch_utils.fuse_conv_and_bn(m.conv, m.bn) # update conv
|
149 |
m.bn = None # remove batchnorm
|
150 |
m.forward = m.fuseforward # update forward
|
151 |
torch_utils.model_info(self)
|
152 |
+
return self
|
153 |
|
154 |
def parse_model(md, ch): # model_dict, input_channels(3)
|
155 |
+
print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
|
156 |
anchors, nc, gd, gw = md['anchors'], md['nc'], md['depth_multiple'], md['width_multiple']
|
157 |
na = (len(anchors[0]) // 2) # number of anchors
|
158 |
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
|
|
167 |
pass
|
168 |
|
169 |
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
170 |
+
if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
|
171 |
c1, c2 = ch[f], args[0]
|
172 |
|
173 |
# Normal
|
|
|
188 |
# c2 = make_divisible(c2, 8) if c2 != no else c2
|
189 |
|
190 |
args = [c1, c2, *args[1:]]
|
191 |
+
if m in [BottleneckCSP, C3]:
|
192 |
args.insert(2, n)
|
193 |
n = 1
|
194 |
elif m is nn.BatchNorm2d:
|
|
|
204 |
t = str(m)[8:-2].replace('__main__.', '') # module type
|
205 |
np = sum([x.numel() for x in m_.parameters()]) # number params
|
206 |
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
207 |
+
print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
|
208 |
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
209 |
layers.append(m_)
|
210 |
ch.append(c2)
|
test.py
CHANGED
@@ -26,6 +26,7 @@ def test(data,
|
|
26 |
# Initialize/load model and set device
|
27 |
if model is None:
|
28 |
training = False
|
|
|
29 |
device = torch_utils.select_device(opt.device, batch_size=batch_size)
|
30 |
|
31 |
# Remove previous
|
@@ -34,10 +35,8 @@ def test(data,
|
|
34 |
|
35 |
# Load model
|
36 |
google_utils.attempt_download(weights)
|
37 |
-
model = torch.load(weights, map_location=device)['model'].float() # load to FP32
|
38 |
-
|
39 |
-
model.fuse()
|
40 |
-
model.to(device)
|
41 |
|
42 |
# Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
|
43 |
# if device.type != 'cpu' and torch.cuda.device_count() > 1:
|
@@ -62,7 +61,6 @@ def test(data,
|
|
62 |
|
63 |
# Dataloader
|
64 |
if dataloader is None: # not training
|
65 |
-
merge = opt.merge # use Merge NMS
|
66 |
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
|
67 |
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
|
68 |
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
|
@@ -246,7 +244,6 @@ if __name__ == '__main__':
|
|
246 |
parser.add_argument('--merge', action='store_true', help='use Merge NMS')
|
247 |
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
|
248 |
opt = parser.parse_args()
|
249 |
-
opt.img_size = check_img_size(opt.img_size)
|
250 |
opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
|
251 |
opt.data = check_file(opt.data) # check file
|
252 |
print(opt)
|
|
|
26 |
# Initialize/load model and set device
|
27 |
if model is None:
|
28 |
training = False
|
29 |
+
merge = opt.merge # use Merge NMS
|
30 |
device = torch_utils.select_device(opt.device, batch_size=batch_size)
|
31 |
|
32 |
# Remove previous
|
|
|
35 |
|
36 |
# Load model
|
37 |
google_utils.attempt_download(weights)
|
38 |
+
model = torch.load(weights, map_location=device)['model'].float().fuse().to(device) # load to FP32
|
39 |
+
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
|
|
|
|
|
40 |
|
41 |
# Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
|
42 |
# if device.type != 'cpu' and torch.cuda.device_count() > 1:
|
|
|
61 |
|
62 |
# Dataloader
|
63 |
if dataloader is None: # not training
|
|
|
64 |
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
|
65 |
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
|
66 |
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
|
|
|
244 |
parser.add_argument('--merge', action='store_true', help='use Merge NMS')
|
245 |
parser.add_argument('--verbose', action='store_true', help='report mAP by class')
|
246 |
opt = parser.parse_args()
|
|
|
247 |
opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
|
248 |
opt.data = check_file(opt.data) # check file
|
249 |
print(opt)
|
train.py
CHANGED
@@ -72,9 +72,7 @@ def train(hyp):
|
|
72 |
os.remove(f)
|
73 |
|
74 |
# Create model
|
75 |
-
model = Model(opt.cfg).to(device)
|
76 |
-
assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
|
77 |
-
model.names = data_dict['names']
|
78 |
|
79 |
# Image sizes
|
80 |
gs = int(max(model.stride)) # grid size (max stride)
|
@@ -101,6 +99,9 @@ def train(hyp):
|
|
101 |
|
102 |
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
|
103 |
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
|
|
|
|
|
|
|
104 |
print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
|
105 |
del pg0, pg1, pg2
|
106 |
|
@@ -116,8 +117,9 @@ def train(hyp):
|
|
116 |
if model.state_dict()[k].shape == v.shape} # to FP32, filter
|
117 |
model.load_state_dict(ckpt['model'], strict=False)
|
118 |
except KeyError as e:
|
119 |
-
s = "%s is not compatible with %s.
|
120 |
-
%
|
|
|
121 |
raise KeyError(s) from e
|
122 |
|
123 |
# load optimizer
|
@@ -130,16 +132,20 @@ def train(hyp):
|
|
130 |
with open(results_file, 'w') as file:
|
131 |
file.write(ckpt['training_results']) # write results.txt
|
132 |
|
|
|
133 |
start_epoch = ckpt['epoch'] + 1
|
|
|
|
|
|
|
|
|
|
|
134 |
del ckpt
|
135 |
|
136 |
# Mixed precision training https://github.com/NVIDIA/apex
|
137 |
if mixed_precision:
|
138 |
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
|
139 |
|
140 |
-
|
141 |
-
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
|
142 |
-
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
|
143 |
scheduler.last_epoch = start_epoch - 1 # do not move
|
144 |
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
|
145 |
plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir)
|
@@ -161,7 +167,7 @@ def train(hyp):
|
|
161 |
|
162 |
# Testloader
|
163 |
testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
|
164 |
-
|
165 |
|
166 |
# Model parameters
|
167 |
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
|
@@ -169,6 +175,7 @@ def train(hyp):
|
|
169 |
model.hyp = hyp # attach hyperparameters to model
|
170 |
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
|
171 |
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
|
|
|
172 |
|
173 |
#save hyperparamter and training options in run folder
|
174 |
with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f:
|
@@ -216,6 +223,10 @@ def train(hyp):
|
|
216 |
image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
|
217 |
dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
|
218 |
|
|
|
|
|
|
|
|
|
219 |
mloss = torch.zeros(4, device=device) # mean losses
|
220 |
print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
|
221 |
pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
|
@@ -323,7 +334,7 @@ def train(hyp):
|
|
323 |
ckpt = {'epoch': epoch,
|
324 |
'best_fitness': best_fitness,
|
325 |
'training_results': f.read(),
|
326 |
-
'model': ema.ema
|
327 |
'optimizer': None if final_epoch else optimizer.state_dict()}
|
328 |
|
329 |
# Save last, best and delete
|
@@ -335,17 +346,17 @@ def train(hyp):
|
|
335 |
# end epoch ----------------------------------------------------------------------------------------------------
|
336 |
# end training
|
337 |
|
338 |
-
|
339 |
-
if len(
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
if not opt.evolve:
|
350 |
plot_results(save_dir = log_dir) # save as results.png
|
351 |
print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
|
@@ -364,6 +375,7 @@ if __name__ == '__main__':
|
|
364 |
parser.add_argument('--batch-size', type=int, default=16)
|
365 |
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.')
|
366 |
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
|
|
367 |
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
368 |
parser.add_argument('--notest', action='store_true', help='only test final epoch')
|
369 |
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
|
@@ -378,6 +390,7 @@ if __name__ == '__main__':
|
|
378 |
|
379 |
opt = parser.parse_args()
|
380 |
|
|
|
381 |
opt.cfg = check_file(opt.cfg) # check file
|
382 |
opt.data = check_file(opt.data) # check file
|
383 |
opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file
|
|
|
72 |
os.remove(f)
|
73 |
|
74 |
# Create model
|
75 |
+
model = Model(opt.cfg, nc=data_dict['nc']).to(device)
|
|
|
|
|
76 |
|
77 |
# Image sizes
|
78 |
gs = int(max(model.stride)) # grid size (max stride)
|
|
|
99 |
|
100 |
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
|
101 |
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
|
102 |
+
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
|
103 |
+
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
|
104 |
+
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
|
105 |
print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
|
106 |
del pg0, pg1, pg2
|
107 |
|
|
|
117 |
if model.state_dict()[k].shape == v.shape} # to FP32, filter
|
118 |
model.load_state_dict(ckpt['model'], strict=False)
|
119 |
except KeyError as e:
|
120 |
+
s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
|
121 |
+
"Please delete or update %s and try again, or use --weights '' to train from scratch." \
|
122 |
+
% (opt.weights, opt.cfg, opt.weights, opt.weights)
|
123 |
raise KeyError(s) from e
|
124 |
|
125 |
# load optimizer
|
|
|
132 |
with open(results_file, 'w') as file:
|
133 |
file.write(ckpt['training_results']) # write results.txt
|
134 |
|
135 |
+
# epochs
|
136 |
start_epoch = ckpt['epoch'] + 1
|
137 |
+
if epochs < start_epoch:
|
138 |
+
print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
|
139 |
+
(opt.weights, ckpt['epoch'], epochs))
|
140 |
+
epochs += ckpt['epoch'] # finetune additional epochs
|
141 |
+
|
142 |
del ckpt
|
143 |
|
144 |
# Mixed precision training https://github.com/NVIDIA/apex
|
145 |
if mixed_precision:
|
146 |
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
|
147 |
|
148 |
+
|
|
|
|
|
149 |
scheduler.last_epoch = start_epoch - 1 # do not move
|
150 |
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
|
151 |
plot_lr_scheduler(optimizer, scheduler, epochs, save_dir = log_dir)
|
|
|
167 |
|
168 |
# Testloader
|
169 |
testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
|
170 |
+
hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
|
171 |
|
172 |
# Model parameters
|
173 |
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
|
|
|
175 |
model.hyp = hyp # attach hyperparameters to model
|
176 |
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
|
177 |
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
|
178 |
+
model.names = data_dict['names']
|
179 |
|
180 |
#save hyperparamter and training options in run folder
|
181 |
with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f:
|
|
|
223 |
image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
|
224 |
dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
|
225 |
|
226 |
+
# Update mosaic border
|
227 |
+
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
|
228 |
+
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
|
229 |
+
|
230 |
mloss = torch.zeros(4, device=device) # mean losses
|
231 |
print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
|
232 |
pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
|
|
|
334 |
ckpt = {'epoch': epoch,
|
335 |
'best_fitness': best_fitness,
|
336 |
'training_results': f.read(),
|
337 |
+
'model': ema.ema,
|
338 |
'optimizer': None if final_epoch else optimizer.state_dict()}
|
339 |
|
340 |
# Save last, best and delete
|
|
|
346 |
# end epoch ----------------------------------------------------------------------------------------------------
|
347 |
# end training
|
348 |
|
349 |
+
# Strip optimizers
|
350 |
+
n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
|
351 |
+
fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
|
352 |
+
for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
|
353 |
+
if os.path.exists(f1):
|
354 |
+
os.rename(f1, f2) # rename
|
355 |
+
ispt = f2.endswith('.pt') # is *.pt
|
356 |
+
strip_optimizer(f2) if ispt else None # strip optimizer
|
357 |
+
os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
|
358 |
+
|
359 |
+
# Finish
|
360 |
if not opt.evolve:
|
361 |
plot_results(save_dir = log_dir) # save as results.png
|
362 |
print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
|
|
|
375 |
parser.add_argument('--batch-size', type=int, default=16)
|
376 |
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.')
|
377 |
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
378 |
+
parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
|
379 |
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
380 |
parser.add_argument('--notest', action='store_true', help='only test final epoch')
|
381 |
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
|
|
|
390 |
|
391 |
opt = parser.parse_args()
|
392 |
|
393 |
+
opt.weights = last if opt.resume and not opt.weights else opt.weights
|
394 |
opt.cfg = check_file(opt.cfg) # check file
|
395 |
opt.data = check_file(opt.data) # check file
|
396 |
opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file
|
utils/datasets.py
CHANGED
@@ -62,7 +62,7 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
|
|
62 |
|
63 |
|
64 |
class LoadImages: # for inference
|
65 |
-
def __init__(self, path, img_size=
|
66 |
path = str(Path(path)) # os-agnostic
|
67 |
files = []
|
68 |
if os.path.isdir(path):
|
@@ -139,7 +139,7 @@ class LoadImages: # for inference
|
|
139 |
|
140 |
|
141 |
class LoadWebcam: # for inference
|
142 |
-
def __init__(self, pipe=0, img_size=
|
143 |
self.img_size = img_size
|
144 |
|
145 |
if pipe == '0':
|
@@ -204,7 +204,7 @@ class LoadWebcam: # for inference
|
|
204 |
|
205 |
|
206 |
class LoadStreams: # multiple IP or RTSP cameras
|
207 |
-
def __init__(self, sources='streams.txt', img_size=
|
208 |
self.mode = 'images'
|
209 |
self.img_size = img_size
|
210 |
|
@@ -277,7 +277,7 @@ class LoadStreams: # multiple IP or RTSP cameras
|
|
277 |
|
278 |
|
279 |
class LoadImagesAndLabels(Dataset): # for training/testing
|
280 |
-
def __init__(self, path, img_size=
|
281 |
cache_images=False, single_cls=False, stride=32, pad=0.0):
|
282 |
try:
|
283 |
path = str(Path(path)) # os-agnostic
|
@@ -307,6 +307,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
307 |
self.image_weights = image_weights
|
308 |
self.rect = False if image_weights else rect
|
309 |
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
|
|
|
|
310 |
|
311 |
# Define labels
|
312 |
self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
|
@@ -585,7 +587,7 @@ def load_mosaic(self, index):
|
|
585 |
|
586 |
labels4 = []
|
587 |
s = self.img_size
|
588 |
-
|
589 |
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
|
590 |
for i, index in enumerate(indices):
|
591 |
# Load image
|
@@ -626,6 +628,9 @@ def load_mosaic(self, index):
|
|
626 |
# np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
|
627 |
np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
|
628 |
|
|
|
|
|
|
|
629 |
# Augment
|
630 |
# img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
|
631 |
img4, labels4 = random_affine(img4, labels4,
|
@@ -633,12 +638,29 @@ def load_mosaic(self, index):
|
|
633 |
translate=self.hyp['translate'],
|
634 |
scale=self.hyp['scale'],
|
635 |
shear=self.hyp['shear'],
|
636 |
-
border
|
637 |
|
638 |
return img4, labels4
|
639 |
|
640 |
|
641 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
642 |
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
643 |
shape = img.shape[:2] # current shape [height, width]
|
644 |
if isinstance(new_shape, int):
|
@@ -671,13 +693,13 @@ def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scale
|
|
671 |
return img, ratio, (dw, dh)
|
672 |
|
673 |
|
674 |
-
def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=0):
|
675 |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
676 |
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
677 |
# targets = [cls, xyxy]
|
678 |
|
679 |
-
height = img.shape[0] + border * 2
|
680 |
-
width = img.shape[1] + border * 2
|
681 |
|
682 |
# Rotation and Scale
|
683 |
R = np.eye(3)
|
@@ -689,8 +711,8 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
|
|
689 |
|
690 |
# Translation
|
691 |
T = np.eye(3)
|
692 |
-
T[0, 2] = random.uniform(-translate, translate) * img.shape[
|
693 |
-
T[1, 2] = random.uniform(-translate, translate) * img.shape[
|
694 |
|
695 |
# Shear
|
696 |
S = np.eye(3)
|
@@ -699,7 +721,7 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
|
|
699 |
|
700 |
# Combined rotation matrix
|
701 |
M = S @ T @ R # ORDER IS IMPORTANT HERE!!
|
702 |
-
if (border != 0) or (M != np.eye(3)).any(): # image changed
|
703 |
img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
|
704 |
|
705 |
# Transform label coordinates
|
@@ -762,6 +784,7 @@ def cutout(image, labels):
|
|
762 |
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
|
763 |
|
764 |
# Intersection over box2 area
|
|
|
765 |
return inter_area / box2_area
|
766 |
|
767 |
# create random masks
|
|
|
62 |
|
63 |
|
64 |
class LoadImages: # for inference
|
65 |
+
def __init__(self, path, img_size=640):
|
66 |
path = str(Path(path)) # os-agnostic
|
67 |
files = []
|
68 |
if os.path.isdir(path):
|
|
|
139 |
|
140 |
|
141 |
class LoadWebcam: # for inference
|
142 |
+
def __init__(self, pipe=0, img_size=640):
|
143 |
self.img_size = img_size
|
144 |
|
145 |
if pipe == '0':
|
|
|
204 |
|
205 |
|
206 |
class LoadStreams: # multiple IP or RTSP cameras
|
207 |
+
def __init__(self, sources='streams.txt', img_size=640):
|
208 |
self.mode = 'images'
|
209 |
self.img_size = img_size
|
210 |
|
|
|
277 |
|
278 |
|
279 |
class LoadImagesAndLabels(Dataset): # for training/testing
|
280 |
+
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
|
281 |
cache_images=False, single_cls=False, stride=32, pad=0.0):
|
282 |
try:
|
283 |
path = str(Path(path)) # os-agnostic
|
|
|
307 |
self.image_weights = image_weights
|
308 |
self.rect = False if image_weights else rect
|
309 |
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
310 |
+
self.mosaic_border = [-img_size // 2, -img_size // 2]
|
311 |
+
self.stride = stride
|
312 |
|
313 |
# Define labels
|
314 |
self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
|
|
|
587 |
|
588 |
labels4 = []
|
589 |
s = self.img_size
|
590 |
+
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
|
591 |
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
|
592 |
for i, index in enumerate(indices):
|
593 |
# Load image
|
|
|
628 |
# np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop
|
629 |
np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine
|
630 |
|
631 |
+
# Replicate
|
632 |
+
# img4, labels4 = replicate(img4, labels4)
|
633 |
+
|
634 |
# Augment
|
635 |
# img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)
|
636 |
img4, labels4 = random_affine(img4, labels4,
|
|
|
638 |
translate=self.hyp['translate'],
|
639 |
scale=self.hyp['scale'],
|
640 |
shear=self.hyp['shear'],
|
641 |
+
border=self.mosaic_border) # border to remove
|
642 |
|
643 |
return img4, labels4
|
644 |
|
645 |
|
646 |
+
def replicate(img, labels):
|
647 |
+
# Replicate labels
|
648 |
+
h, w = img.shape[:2]
|
649 |
+
boxes = labels[:, 1:].astype(int)
|
650 |
+
x1, y1, x2, y2 = boxes.T
|
651 |
+
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
|
652 |
+
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
|
653 |
+
x1b, y1b, x2b, y2b = boxes[i]
|
654 |
+
bh, bw = y2b - y1b, x2b - x1b
|
655 |
+
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
|
656 |
+
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
|
657 |
+
img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
658 |
+
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
|
659 |
+
|
660 |
+
return img, labels
|
661 |
+
|
662 |
+
|
663 |
+
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
|
664 |
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
665 |
shape = img.shape[:2] # current shape [height, width]
|
666 |
if isinstance(new_shape, int):
|
|
|
693 |
return img, ratio, (dw, dh)
|
694 |
|
695 |
|
696 |
+
def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):
|
697 |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
698 |
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
699 |
# targets = [cls, xyxy]
|
700 |
|
701 |
+
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
|
702 |
+
width = img.shape[1] + border[1] * 2
|
703 |
|
704 |
# Rotation and Scale
|
705 |
R = np.eye(3)
|
|
|
711 |
|
712 |
# Translation
|
713 |
T = np.eye(3)
|
714 |
+
T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)
|
715 |
+
T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)
|
716 |
|
717 |
# Shear
|
718 |
S = np.eye(3)
|
|
|
721 |
|
722 |
# Combined rotation matrix
|
723 |
M = S @ T @ R # ORDER IS IMPORTANT HERE!!
|
724 |
+
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
725 |
img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))
|
726 |
|
727 |
# Transform label coordinates
|
|
|
784 |
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
|
785 |
|
786 |
# Intersection over box2 area
|
787 |
+
|
788 |
return inter_area / box2_area
|
789 |
|
790 |
# create random masks
|
utils/torch_utils.py
CHANGED
@@ -54,6 +54,11 @@ def time_synchronized():
|
|
54 |
return time.time()
|
55 |
|
56 |
|
|
|
|
|
|
|
|
|
|
|
57 |
def initialize_weights(model):
|
58 |
for m in model.modules():
|
59 |
t = type(m)
|
@@ -71,16 +76,36 @@ def find_modules(model, mclass=nn.Conv2d):
|
|
71 |
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
72 |
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
def fuse_conv_and_bn(conv, bn):
|
75 |
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
76 |
with torch.no_grad():
|
77 |
# init
|
78 |
-
fusedconv =
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
|
85 |
# prepare filters
|
86 |
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
@@ -88,10 +113,7 @@ def fuse_conv_and_bn(conv, bn):
|
|
88 |
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
|
89 |
|
90 |
# prepare spatial bias
|
91 |
-
if conv.bias is
|
92 |
-
b_conv = conv.bias
|
93 |
-
else:
|
94 |
-
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device)
|
95 |
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
96 |
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
97 |
|
@@ -111,8 +133,8 @@ def model_info(model, verbose=False):
|
|
111 |
|
112 |
try: # FLOPS
|
113 |
from thop import profile
|
114 |
-
|
115 |
-
fs = ', %.1f GFLOPS' % (
|
116 |
except:
|
117 |
fs = ''
|
118 |
|
@@ -134,8 +156,8 @@ def load_classifier(name='resnet101', n=2):
|
|
134 |
|
135 |
# Reshape output to n classes
|
136 |
filters = model.fc.weight.shape[1]
|
137 |
-
model.fc.bias =
|
138 |
-
model.fc.weight =
|
139 |
model.fc.out_features = n
|
140 |
return model
|
141 |
|
@@ -170,33 +192,31 @@ class ModelEMA:
|
|
170 |
"""
|
171 |
|
172 |
def __init__(self, model, decay=0.9999, device=''):
|
173 |
-
#
|
174 |
-
self.ema = deepcopy(model)
|
175 |
self.ema.eval()
|
176 |
self.updates = 0 # number of EMA updates
|
177 |
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
|
178 |
self.device = device # perform ema on different device from model if set
|
179 |
if device:
|
180 |
-
self.ema.to(device
|
181 |
for p in self.ema.parameters():
|
182 |
p.requires_grad_(False)
|
183 |
|
184 |
def update(self, model):
|
185 |
-
|
186 |
-
d = self.decay(self.updates)
|
187 |
with torch.no_grad():
|
188 |
-
|
189 |
-
|
190 |
-
else:
|
191 |
-
msd, esd = model.state_dict(), self.ema.state_dict()
|
192 |
|
193 |
-
|
|
|
194 |
if v.dtype.is_floating_point:
|
195 |
v *= d
|
196 |
v += (1. - d) * msd[k].detach()
|
197 |
|
198 |
def update_attr(self, model):
|
199 |
-
#
|
200 |
-
for k in model.__dict__.
|
201 |
-
if not k.startswith('_'):
|
202 |
-
setattr(self.ema, k,
|
|
|
54 |
return time.time()
|
55 |
|
56 |
|
57 |
+
def is_parallel(model):
|
58 |
+
# is model is parallel with DP or DDP
|
59 |
+
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
|
60 |
+
|
61 |
+
|
62 |
def initialize_weights(model):
|
63 |
for m in model.modules():
|
64 |
t = type(m)
|
|
|
76 |
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
77 |
|
78 |
|
79 |
+
def sparsity(model):
|
80 |
+
# Return global model sparsity
|
81 |
+
a, b = 0., 0.
|
82 |
+
for p in model.parameters():
|
83 |
+
a += p.numel()
|
84 |
+
b += (p == 0).sum()
|
85 |
+
return b / a
|
86 |
+
|
87 |
+
|
88 |
+
def prune(model, amount=0.3):
|
89 |
+
# Prune model to requested global sparsity
|
90 |
+
import torch.nn.utils.prune as prune
|
91 |
+
print('Pruning model... ', end='')
|
92 |
+
for name, m in model.named_modules():
|
93 |
+
if isinstance(m, nn.Conv2d):
|
94 |
+
prune.l1_unstructured(m, name='weight', amount=amount) # prune
|
95 |
+
prune.remove(m, 'weight') # make permanent
|
96 |
+
print(' %.3g global sparsity' % sparsity(model))
|
97 |
+
|
98 |
+
|
99 |
def fuse_conv_and_bn(conv, bn):
|
100 |
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
101 |
with torch.no_grad():
|
102 |
# init
|
103 |
+
fusedconv = nn.Conv2d(conv.in_channels,
|
104 |
+
conv.out_channels,
|
105 |
+
kernel_size=conv.kernel_size,
|
106 |
+
stride=conv.stride,
|
107 |
+
padding=conv.padding,
|
108 |
+
bias=True).to(conv.weight.device)
|
109 |
|
110 |
# prepare filters
|
111 |
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
|
|
113 |
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
|
114 |
|
115 |
# prepare spatial bias
|
116 |
+
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
|
|
|
|
|
|
|
117 |
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
118 |
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
119 |
|
|
|
133 |
|
134 |
try: # FLOPS
|
135 |
from thop import profile
|
136 |
+
flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2
|
137 |
+
fs = ', %.1f GFLOPS' % (flops * 100) # 640x640 FLOPS
|
138 |
except:
|
139 |
fs = ''
|
140 |
|
|
|
156 |
|
157 |
# Reshape output to n classes
|
158 |
filters = model.fc.weight.shape[1]
|
159 |
+
model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
|
160 |
+
model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
|
161 |
model.fc.out_features = n
|
162 |
return model
|
163 |
|
|
|
192 |
"""
|
193 |
|
194 |
def __init__(self, model, decay=0.9999, device=''):
|
195 |
+
# Create EMA
|
196 |
+
self.ema = deepcopy(model.module if is_parallel(model) else model) # FP32 EMA
|
197 |
self.ema.eval()
|
198 |
self.updates = 0 # number of EMA updates
|
199 |
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
|
200 |
self.device = device # perform ema on different device from model if set
|
201 |
if device:
|
202 |
+
self.ema.to(device)
|
203 |
for p in self.ema.parameters():
|
204 |
p.requires_grad_(False)
|
205 |
|
206 |
def update(self, model):
|
207 |
+
# Update EMA parameters
|
|
|
208 |
with torch.no_grad():
|
209 |
+
self.updates += 1
|
210 |
+
d = self.decay(self.updates)
|
|
|
|
|
211 |
|
212 |
+
msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
|
213 |
+
for k, v in self.ema.state_dict().items():
|
214 |
if v.dtype.is_floating_point:
|
215 |
v *= d
|
216 |
v += (1. - d) * msd[k].detach()
|
217 |
|
218 |
def update_attr(self, model):
|
219 |
+
# Update EMA attributes
|
220 |
+
for k, v in model.__dict__.items():
|
221 |
+
if not k.startswith('_') and k not in ["process_group", "reducer"]:
|
222 |
+
setattr(self.ema, k, v)
|
utils/utils.py
CHANGED
@@ -53,7 +53,7 @@ def check_git_status():
|
|
53 |
|
54 |
def check_img_size(img_size, s=32):
|
55 |
# Verify img_size is a multiple of stride s
|
56 |
-
new_size = make_divisible(img_size, s) # ceil gs-multiple
|
57 |
if new_size != img_size:
|
58 |
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
|
59 |
return new_size
|
@@ -443,7 +443,9 @@ def compute_loss(p, targets, model): # predictions, targets, model
|
|
443 |
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
|
444 |
|
445 |
# per output
|
446 |
-
nt = 0 # targets
|
|
|
|
|
447 |
for i, pi in enumerate(p): # layer index, layer predictions
|
448 |
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
449 |
tobj = torch.zeros_like(pi[..., 0]) # target obj
|
@@ -473,11 +475,12 @@ def compute_loss(p, targets, model): # predictions, targets, model
|
|
473 |
# with open('targets.txt', 'a') as file:
|
474 |
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
475 |
|
476 |
-
lobj += BCEobj(pi[..., 4], tobj) # obj loss
|
477 |
|
478 |
-
|
479 |
-
|
480 |
-
|
|
|
481 |
bs = tobj.shape[0] # batch size
|
482 |
if red == 'sum':
|
483 |
g = 3.0 # loss gain
|
@@ -514,16 +517,14 @@ def build_targets(p, targets, model):
|
|
514 |
a, t = at[j], t.repeat(na, 1, 1)[j] # filter
|
515 |
|
516 |
# overlaps
|
|
|
517 |
gxy = t[:, 2:4] # grid xy
|
518 |
z = torch.zeros_like(gxy)
|
519 |
if style == 'rect2':
|
520 |
-
g = 0.2 # offset
|
521 |
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
522 |
a, t = torch.cat((a, a[j], a[k]), 0), torch.cat((t, t[j], t[k]), 0)
|
523 |
offsets = torch.cat((z, z[j] + off[0], z[k] + off[1]), 0) * g
|
524 |
-
|
525 |
elif style == 'rect4':
|
526 |
-
g = 0.5 # offset
|
527 |
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
528 |
l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
|
529 |
a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
|
@@ -770,11 +771,11 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
|
|
770 |
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
|
771 |
|
772 |
# Filter
|
773 |
-
i = (wh0 <
|
774 |
if i:
|
775 |
print('WARNING: Extremely small objects found. '
|
776 |
-
'%g of %g labels are <
|
777 |
-
wh = wh0[(wh0 >=
|
778 |
|
779 |
# Kmeans calculation
|
780 |
from scipy.cluster.vq import kmeans
|
|
|
53 |
|
54 |
def check_img_size(img_size, s=32):
|
55 |
# Verify img_size is a multiple of stride s
|
56 |
+
new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
|
57 |
if new_size != img_size:
|
58 |
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
|
59 |
return new_size
|
|
|
443 |
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
|
444 |
|
445 |
# per output
|
446 |
+
nt = 0 # number of targets
|
447 |
+
np = len(p) # number of outputs
|
448 |
+
balance = [1.0, 1.0, 1.0]
|
449 |
for i, pi in enumerate(p): # layer index, layer predictions
|
450 |
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
451 |
tobj = torch.zeros_like(pi[..., 0]) # target obj
|
|
|
475 |
# with open('targets.txt', 'a') as file:
|
476 |
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
477 |
|
478 |
+
lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
|
479 |
|
480 |
+
s = 3 / np # output count scaling
|
481 |
+
lbox *= h['giou'] * s
|
482 |
+
lobj *= h['obj'] * s
|
483 |
+
lcls *= h['cls'] * s
|
484 |
bs = tobj.shape[0] # batch size
|
485 |
if red == 'sum':
|
486 |
g = 3.0 # loss gain
|
|
|
517 |
a, t = at[j], t.repeat(na, 1, 1)[j] # filter
|
518 |
|
519 |
# overlaps
|
520 |
+
g = 0.5 # offset
|
521 |
gxy = t[:, 2:4] # grid xy
|
522 |
z = torch.zeros_like(gxy)
|
523 |
if style == 'rect2':
|
|
|
524 |
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
525 |
a, t = torch.cat((a, a[j], a[k]), 0), torch.cat((t, t[j], t[k]), 0)
|
526 |
offsets = torch.cat((z, z[j] + off[0], z[k] + off[1]), 0) * g
|
|
|
527 |
elif style == 'rect4':
|
|
|
528 |
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
529 |
l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
|
530 |
a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
|
|
|
771 |
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
|
772 |
|
773 |
# Filter
|
774 |
+
i = (wh0 < 3.0).any(1).sum()
|
775 |
if i:
|
776 |
print('WARNING: Extremely small objects found. '
|
777 |
+
'%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
|
778 |
+
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
|
779 |
|
780 |
# Kmeans calculation
|
781 |
from scipy.cluster.vq import kmeans
|