Improved `detect.py` timing (#4741)
Browse files* Improved detect.py timing
* Eliminate 1 time_sync() call
* Inference-only time
* dash
* #Save section
* Cleanup
detect.py
CHANGED
@@ -8,7 +8,6 @@ Usage:
|
|
8 |
|
9 |
import argparse
|
10 |
import sys
|
11 |
-
import time
|
12 |
from pathlib import Path
|
13 |
|
14 |
import cv2
|
@@ -123,8 +122,9 @@ def run(weights='yolov5s.pt', # model.pt path(s)
|
|
123 |
# Run inference
|
124 |
if pt and device.type != 'cpu':
|
125 |
model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once
|
126 |
-
|
127 |
for path, img, im0s, vid_cap in dataset:
|
|
|
128 |
if onnx:
|
129 |
img = img.astype('float32')
|
130 |
else:
|
@@ -133,9 +133,10 @@ def run(weights='yolov5s.pt', # model.pt path(s)
|
|
133 |
img = img / 255.0 # 0 - 255 to 0.0 - 1.0
|
134 |
if len(img.shape) == 3:
|
135 |
img = img[None] # expand for batch dim
|
|
|
|
|
136 |
|
137 |
# Inference
|
138 |
-
t1 = time_sync()
|
139 |
if pt:
|
140 |
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
|
141 |
pred = model(img, augment=augment, visualize=visualize)[0]
|
@@ -162,17 +163,20 @@ def run(weights='yolov5s.pt', # model.pt path(s)
|
|
162 |
pred[..., 2] *= imgsz[1] # w
|
163 |
pred[..., 3] *= imgsz[0] # h
|
164 |
pred = torch.tensor(pred)
|
|
|
|
|
165 |
|
166 |
# NMS
|
167 |
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
|
168 |
-
|
169 |
|
170 |
# Second-stage classifier (optional)
|
171 |
if classify:
|
172 |
pred = apply_classifier(pred, modelc, img, im0s)
|
173 |
|
174 |
# Process predictions
|
175 |
-
for i, det in enumerate(pred): #
|
|
|
176 |
if webcam: # batch_size >= 1
|
177 |
p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
|
178 |
else:
|
@@ -209,8 +213,8 @@ def run(weights='yolov5s.pt', # model.pt path(s)
|
|
209 |
if save_crop:
|
210 |
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
211 |
|
212 |
-
# Print time (inference
|
213 |
-
print(f'{s}Done. ({
|
214 |
|
215 |
# Stream results
|
216 |
im0 = annotator.result()
|
@@ -237,15 +241,15 @@ def run(weights='yolov5s.pt', # model.pt path(s)
|
|
237 |
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
238 |
vid_writer[i].write(im0)
|
239 |
|
|
|
|
|
|
|
240 |
if save_txt or save_img:
|
241 |
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
242 |
print(f"Results saved to {colorstr('bold', save_dir)}{s}")
|
243 |
-
|
244 |
if update:
|
245 |
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
|
246 |
|
247 |
-
print(f'Done. ({time.time() - t0:.3f}s)')
|
248 |
-
|
249 |
|
250 |
def parse_opt():
|
251 |
parser = argparse.ArgumentParser()
|
|
|
8 |
|
9 |
import argparse
|
10 |
import sys
|
|
|
11 |
from pathlib import Path
|
12 |
|
13 |
import cv2
|
|
|
122 |
# Run inference
|
123 |
if pt and device.type != 'cpu':
|
124 |
model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once
|
125 |
+
dt, seen = [0.0, 0.0, 0.0], 0
|
126 |
for path, img, im0s, vid_cap in dataset:
|
127 |
+
t1 = time_sync()
|
128 |
if onnx:
|
129 |
img = img.astype('float32')
|
130 |
else:
|
|
|
133 |
img = img / 255.0 # 0 - 255 to 0.0 - 1.0
|
134 |
if len(img.shape) == 3:
|
135 |
img = img[None] # expand for batch dim
|
136 |
+
t2 = time_sync()
|
137 |
+
dt[0] += t2 - t1
|
138 |
|
139 |
# Inference
|
|
|
140 |
if pt:
|
141 |
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
|
142 |
pred = model(img, augment=augment, visualize=visualize)[0]
|
|
|
163 |
pred[..., 2] *= imgsz[1] # w
|
164 |
pred[..., 3] *= imgsz[0] # h
|
165 |
pred = torch.tensor(pred)
|
166 |
+
t3 = time_sync()
|
167 |
+
dt[1] += t3 - t2
|
168 |
|
169 |
# NMS
|
170 |
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
|
171 |
+
dt[2] += time_sync() - t3
|
172 |
|
173 |
# Second-stage classifier (optional)
|
174 |
if classify:
|
175 |
pred = apply_classifier(pred, modelc, img, im0s)
|
176 |
|
177 |
# Process predictions
|
178 |
+
for i, det in enumerate(pred): # per image
|
179 |
+
seen += 1
|
180 |
if webcam: # batch_size >= 1
|
181 |
p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
|
182 |
else:
|
|
|
213 |
if save_crop:
|
214 |
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
|
215 |
|
216 |
+
# Print time (inference-only)
|
217 |
+
print(f'{s}Done. ({t3 - t2:.3f}s)')
|
218 |
|
219 |
# Stream results
|
220 |
im0 = annotator.result()
|
|
|
241 |
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
242 |
vid_writer[i].write(im0)
|
243 |
|
244 |
+
# Print results
|
245 |
+
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
|
246 |
+
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
|
247 |
if save_txt or save_img:
|
248 |
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
|
249 |
print(f"Results saved to {colorstr('bold', save_dir)}{s}")
|
|
|
250 |
if update:
|
251 |
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
|
252 |
|
|
|
|
|
253 |
|
254 |
def parse_opt():
|
255 |
parser = argparse.ArgumentParser()
|
val.py
CHANGED
@@ -154,22 +154,22 @@ def run(data,
|
|
154 |
names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
|
155 |
class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
|
156 |
s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95')
|
157 |
-
p, r, f1, mp, mr, map50, map
|
158 |
loss = torch.zeros(3, device=device)
|
159 |
jdict, stats, ap, ap_class = [], [], [], []
|
160 |
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
|
161 |
-
|
162 |
img = img.to(device, non_blocking=True)
|
163 |
img = img.half() if half else img.float() # uint8 to fp16/32
|
164 |
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
165 |
targets = targets.to(device)
|
166 |
nb, _, height, width = img.shape # batch size, channels, height, width
|
167 |
-
|
168 |
-
|
169 |
|
170 |
# Run model
|
171 |
out, train_out = model(img, augment=augment) # inference and training outputs
|
172 |
-
|
173 |
|
174 |
# Compute loss
|
175 |
if compute_loss:
|
@@ -178,9 +178,9 @@ def run(data,
|
|
178 |
# Run NMS
|
179 |
targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
|
180 |
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
|
181 |
-
|
182 |
out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
|
183 |
-
|
184 |
|
185 |
# Statistics per image
|
186 |
for si, pred in enumerate(out):
|
@@ -247,7 +247,7 @@ def run(data,
|
|
247 |
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
|
248 |
|
249 |
# Print speeds
|
250 |
-
t = tuple(x / seen * 1E3 for x in
|
251 |
if not training:
|
252 |
shape = (batch_size, 3, imgsz, imgsz)
|
253 |
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
|
|
|
154 |
names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
|
155 |
class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
|
156 |
s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95')
|
157 |
+
dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
|
158 |
loss = torch.zeros(3, device=device)
|
159 |
jdict, stats, ap, ap_class = [], [], [], []
|
160 |
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
|
161 |
+
t1 = time_sync()
|
162 |
img = img.to(device, non_blocking=True)
|
163 |
img = img.half() if half else img.float() # uint8 to fp16/32
|
164 |
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
165 |
targets = targets.to(device)
|
166 |
nb, _, height, width = img.shape # batch size, channels, height, width
|
167 |
+
t2 = time_sync()
|
168 |
+
dt[0] += t2 - t1
|
169 |
|
170 |
# Run model
|
171 |
out, train_out = model(img, augment=augment) # inference and training outputs
|
172 |
+
dt[1] += time_sync() - t2
|
173 |
|
174 |
# Compute loss
|
175 |
if compute_loss:
|
|
|
178 |
# Run NMS
|
179 |
targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
|
180 |
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
|
181 |
+
t3 = time_sync()
|
182 |
out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
|
183 |
+
dt[2] += time_sync() - t3
|
184 |
|
185 |
# Statistics per image
|
186 |
for si, pred in enumerate(out):
|
|
|
247 |
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
|
248 |
|
249 |
# Print speeds
|
250 |
+
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
|
251 |
if not training:
|
252 |
shape = (batch_size, 3, imgsz, imgsz)
|
253 |
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
|