glenn-jocher commited on
Commit
7af1b4c
·
unverified ·
1 Parent(s): c5360f6

Improved `detect.py` timing (#4741)

Browse files

* Improved detect.py timing

* Eliminate 1 time_sync() call

* Inference-only time

* dash

* #Save section

* Cleanup

Files changed (2) hide show
  1. detect.py +14 -10
  2. val.py +8 -8
detect.py CHANGED
@@ -8,7 +8,6 @@ Usage:
8
 
9
  import argparse
10
  import sys
11
- import time
12
  from pathlib import Path
13
 
14
  import cv2
@@ -123,8 +122,9 @@ def run(weights='yolov5s.pt', # model.pt path(s)
123
  # Run inference
124
  if pt and device.type != 'cpu':
125
  model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once
126
- t0 = time.time()
127
  for path, img, im0s, vid_cap in dataset:
 
128
  if onnx:
129
  img = img.astype('float32')
130
  else:
@@ -133,9 +133,10 @@ def run(weights='yolov5s.pt', # model.pt path(s)
133
  img = img / 255.0 # 0 - 255 to 0.0 - 1.0
134
  if len(img.shape) == 3:
135
  img = img[None] # expand for batch dim
 
 
136
 
137
  # Inference
138
- t1 = time_sync()
139
  if pt:
140
  visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
141
  pred = model(img, augment=augment, visualize=visualize)[0]
@@ -162,17 +163,20 @@ def run(weights='yolov5s.pt', # model.pt path(s)
162
  pred[..., 2] *= imgsz[1] # w
163
  pred[..., 3] *= imgsz[0] # h
164
  pred = torch.tensor(pred)
 
 
165
 
166
  # NMS
167
  pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
168
- t2 = time_sync()
169
 
170
  # Second-stage classifier (optional)
171
  if classify:
172
  pred = apply_classifier(pred, modelc, img, im0s)
173
 
174
  # Process predictions
175
- for i, det in enumerate(pred): # detections per image
 
176
  if webcam: # batch_size >= 1
177
  p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
178
  else:
@@ -209,8 +213,8 @@ def run(weights='yolov5s.pt', # model.pt path(s)
209
  if save_crop:
210
  save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
211
 
212
- # Print time (inference + NMS)
213
- print(f'{s}Done. ({t2 - t1:.3f}s)')
214
 
215
  # Stream results
216
  im0 = annotator.result()
@@ -237,15 +241,15 @@ def run(weights='yolov5s.pt', # model.pt path(s)
237
  vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
238
  vid_writer[i].write(im0)
239
 
 
 
 
240
  if save_txt or save_img:
241
  s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
242
  print(f"Results saved to {colorstr('bold', save_dir)}{s}")
243
-
244
  if update:
245
  strip_optimizer(weights) # update model (to fix SourceChangeWarning)
246
 
247
- print(f'Done. ({time.time() - t0:.3f}s)')
248
-
249
 
250
  def parse_opt():
251
  parser = argparse.ArgumentParser()
 
8
 
9
  import argparse
10
  import sys
 
11
  from pathlib import Path
12
 
13
  import cv2
 
122
  # Run inference
123
  if pt and device.type != 'cpu':
124
  model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once
125
+ dt, seen = [0.0, 0.0, 0.0], 0
126
  for path, img, im0s, vid_cap in dataset:
127
+ t1 = time_sync()
128
  if onnx:
129
  img = img.astype('float32')
130
  else:
 
133
  img = img / 255.0 # 0 - 255 to 0.0 - 1.0
134
  if len(img.shape) == 3:
135
  img = img[None] # expand for batch dim
136
+ t2 = time_sync()
137
+ dt[0] += t2 - t1
138
 
139
  # Inference
 
140
  if pt:
141
  visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
142
  pred = model(img, augment=augment, visualize=visualize)[0]
 
163
  pred[..., 2] *= imgsz[1] # w
164
  pred[..., 3] *= imgsz[0] # h
165
  pred = torch.tensor(pred)
166
+ t3 = time_sync()
167
+ dt[1] += t3 - t2
168
 
169
  # NMS
170
  pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
171
+ dt[2] += time_sync() - t3
172
 
173
  # Second-stage classifier (optional)
174
  if classify:
175
  pred = apply_classifier(pred, modelc, img, im0s)
176
 
177
  # Process predictions
178
+ for i, det in enumerate(pred): # per image
179
+ seen += 1
180
  if webcam: # batch_size >= 1
181
  p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
182
  else:
 
213
  if save_crop:
214
  save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
215
 
216
+ # Print time (inference-only)
217
+ print(f'{s}Done. ({t3 - t2:.3f}s)')
218
 
219
  # Stream results
220
  im0 = annotator.result()
 
241
  vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
242
  vid_writer[i].write(im0)
243
 
244
+ # Print results
245
+ t = tuple(x / seen * 1E3 for x in dt) # speeds per image
246
+ print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
247
  if save_txt or save_img:
248
  s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
249
  print(f"Results saved to {colorstr('bold', save_dir)}{s}")
 
250
  if update:
251
  strip_optimizer(weights) # update model (to fix SourceChangeWarning)
252
 
 
 
253
 
254
  def parse_opt():
255
  parser = argparse.ArgumentParser()
val.py CHANGED
@@ -154,22 +154,22 @@ def run(data,
154
  names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
155
  class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
156
  s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95')
157
- p, r, f1, mp, mr, map50, map, t0, t1, t2 = 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
158
  loss = torch.zeros(3, device=device)
159
  jdict, stats, ap, ap_class = [], [], [], []
160
  for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
161
- t_ = time_sync()
162
  img = img.to(device, non_blocking=True)
163
  img = img.half() if half else img.float() # uint8 to fp16/32
164
  img /= 255.0 # 0 - 255 to 0.0 - 1.0
165
  targets = targets.to(device)
166
  nb, _, height, width = img.shape # batch size, channels, height, width
167
- t = time_sync()
168
- t0 += t - t_
169
 
170
  # Run model
171
  out, train_out = model(img, augment=augment) # inference and training outputs
172
- t1 += time_sync() - t
173
 
174
  # Compute loss
175
  if compute_loss:
@@ -178,9 +178,9 @@ def run(data,
178
  # Run NMS
179
  targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
180
  lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
181
- t = time_sync()
182
  out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
183
- t2 += time_sync() - t
184
 
185
  # Statistics per image
186
  for si, pred in enumerate(out):
@@ -247,7 +247,7 @@ def run(data,
247
  print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
248
 
249
  # Print speeds
250
- t = tuple(x / seen * 1E3 for x in (t0, t1, t2)) # speeds per image
251
  if not training:
252
  shape = (batch_size, 3, imgsz, imgsz)
253
  print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)
 
154
  names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
155
  class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
156
  s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', '[email protected]', '[email protected]:.95')
157
+ dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
158
  loss = torch.zeros(3, device=device)
159
  jdict, stats, ap, ap_class = [], [], [], []
160
  for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
161
+ t1 = time_sync()
162
  img = img.to(device, non_blocking=True)
163
  img = img.half() if half else img.float() # uint8 to fp16/32
164
  img /= 255.0 # 0 - 255 to 0.0 - 1.0
165
  targets = targets.to(device)
166
  nb, _, height, width = img.shape # batch size, channels, height, width
167
+ t2 = time_sync()
168
+ dt[0] += t2 - t1
169
 
170
  # Run model
171
  out, train_out = model(img, augment=augment) # inference and training outputs
172
+ dt[1] += time_sync() - t2
173
 
174
  # Compute loss
175
  if compute_loss:
 
178
  # Run NMS
179
  targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
180
  lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
181
+ t3 = time_sync()
182
  out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
183
+ dt[2] += time_sync() - t3
184
 
185
  # Statistics per image
186
  for si, pred in enumerate(out):
 
247
  print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
248
 
249
  # Print speeds
250
+ t = tuple(x / seen * 1E3 for x in dt) # speeds per image
251
  if not training:
252
  shape = (batch_size, 3, imgsz, imgsz)
253
  print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t)