glenn-jocher commited on
Commit
7bf04d9
·
unverified ·
1 Parent(s): d885799

`AutoShape()` models as `DetectMultiBackend()` instances (#5845)

Browse files

* Update AutoShape()

* autodownload ONNX

* Cleanup

* Finish updates

* Add Usage

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* fix device

* Update hubconf.py

* Update common.py

* smart param selection

* autodownload all formats

* autopad only pytorch models

* new_shape edits

* stride tensor fix

* Cleanup

Files changed (4) hide show
  1. export.py +1 -1
  2. hubconf.py +7 -7
  3. models/common.py +24 -16
  4. utils/general.py +3 -1
export.py CHANGED
@@ -411,7 +411,7 @@ def parse_opt():
411
  parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
412
  parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
413
  parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
414
- parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
415
  parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
416
  parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
417
  parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
 
411
  parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
412
  parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
413
  parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
414
+ parser.add_argument('--opset', type=int, default=14, help='ONNX: opset version')
415
  parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')
416
  parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')
417
  parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
hubconf.py CHANGED
@@ -5,6 +5,7 @@ PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5/
5
  Usage:
6
  import torch
7
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
 
8
  """
9
 
10
  import torch
@@ -27,26 +28,25 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
27
  """
28
  from pathlib import Path
29
 
30
- from models.common import AutoShape
31
- from models.experimental import attempt_load
32
  from models.yolo import Model
33
  from utils.downloads import attempt_download
34
  from utils.general import check_requirements, intersect_dicts, set_logging
35
  from utils.torch_utils import select_device
36
 
37
- file = Path(__file__).resolve()
38
  check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
39
  set_logging(verbose=verbose)
40
 
41
- save_dir = Path('') if str(name).endswith('.pt') else file.parent
42
- path = (save_dir / name).with_suffix('.pt') # checkpoint path
43
  try:
44
  device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device)
45
 
46
  if pretrained and channels == 3 and classes == 80:
47
- model = attempt_load(path, map_location=device) # download/load FP32 model
 
48
  else:
49
- cfg = list((Path(__file__).parent / 'models').rglob(f'{name}.yaml'))[0] # model.yaml path
50
  model = Model(cfg, channels, classes) # create model
51
  if pretrained:
52
  ckpt = torch.load(attempt_download(path), map_location=device) # load
 
5
  Usage:
6
  import torch
7
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
8
+ model = torch.hub.load('ultralytics/yolov5:master', 'custom', 'path/to/yolov5s.onnx') # file from branch
9
  """
10
 
11
  import torch
 
28
  """
29
  from pathlib import Path
30
 
31
+ from models.common import AutoShape, DetectMultiBackend
 
32
  from models.yolo import Model
33
  from utils.downloads import attempt_download
34
  from utils.general import check_requirements, intersect_dicts, set_logging
35
  from utils.torch_utils import select_device
36
 
 
37
  check_requirements(exclude=('tensorboard', 'thop', 'opencv-python'))
38
  set_logging(verbose=verbose)
39
 
40
+ name = Path(name)
41
+ path = name.with_suffix('.pt') if name.suffix == '' else name # checkpoint path
42
  try:
43
  device = select_device(('0' if torch.cuda.is_available() else 'cpu') if device is None else device)
44
 
45
  if pretrained and channels == 3 and classes == 80:
46
+ model = DetectMultiBackend(path, device=device) # download/load FP32 model
47
+ # model = models.experimental.attempt_load(path, map_location=device) # download/load FP32 model
48
  else:
49
+ cfg = list((Path(__file__).parent / 'models').rglob(f'{path.name}.yaml'))[0] # model.yaml path
50
  model = Model(cfg, channels, classes) # create model
51
  if pretrained:
52
  ckpt = torch.load(attempt_download(path), map_location=device) # load
models/common.py CHANGED
@@ -276,7 +276,7 @@ class Concat(nn.Module):
276
 
277
  class DetectMultiBackend(nn.Module):
278
  # YOLOv5 MultiBackend class for python inference on various backends
279
- def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
280
  # Usage:
281
  # PyTorch: weights = *.pt
282
  # TorchScript: *.torchscript
@@ -287,6 +287,8 @@ class DetectMultiBackend(nn.Module):
287
  # ONNX Runtime: *.onnx
288
  # OpenCV DNN: *.onnx with dnn=True
289
  # TensorRT: *.engine
 
 
290
  super().__init__()
291
  w = str(weights[0] if isinstance(weights, list) else weights)
292
  suffix = Path(w).suffix.lower()
@@ -294,6 +296,7 @@ class DetectMultiBackend(nn.Module):
294
  check_suffix(w, suffixes) # check weights have acceptable suffix
295
  pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
296
  stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
 
297
 
298
  if jit: # TorchScript
299
  LOGGER.info(f'Loading {w} for TorchScript inference...')
@@ -303,11 +306,12 @@ class DetectMultiBackend(nn.Module):
303
  d = json.loads(extra_files['config.txt']) # extra_files dict
304
  stride, names = int(d['stride']), d['names']
305
  elif pt: # PyTorch
306
- from models.experimental import attempt_load # scoped to avoid circular import
307
  model = attempt_load(weights, map_location=device)
308
  stride = int(model.stride.max()) # model stride
309
  names = model.module.names if hasattr(model, 'module') else model.names # get class names
 
310
  elif coreml: # CoreML
 
311
  import coremltools as ct
312
  model = ct.models.MLModel(w)
313
  elif dnn: # ONNX OpenCV DNN
@@ -316,7 +320,7 @@ class DetectMultiBackend(nn.Module):
316
  net = cv2.dnn.readNetFromONNX(w)
317
  elif onnx: # ONNX Runtime
318
  LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
319
- check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
320
  import onnxruntime
321
  session = onnxruntime.InferenceSession(w, None)
322
  elif engine: # TensorRT
@@ -376,7 +380,7 @@ class DetectMultiBackend(nn.Module):
376
  if self.pt: # PyTorch
377
  y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
378
  return y if val else y[0]
379
- elif self.coreml: # CoreML *.mlmodel
380
  im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
381
  im = Image.fromarray((im[0] * 255).astype('uint8'))
382
  # im = im.resize((192, 320), Image.ANTIALIAS)
@@ -433,24 +437,28 @@ class AutoShape(nn.Module):
433
  # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
434
  conf = 0.25 # NMS confidence threshold
435
  iou = 0.45 # NMS IoU threshold
436
- classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
437
  multi_label = False # NMS multiple labels per box
 
438
  max_det = 1000 # maximum number of detections per image
439
 
440
  def __init__(self, model):
441
  super().__init__()
442
  LOGGER.info('Adding AutoShape... ')
443
  copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
 
 
444
  self.model = model.eval()
445
 
446
  def _apply(self, fn):
447
  # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
448
  self = super()._apply(fn)
449
- m = self.model.model[-1] # Detect()
450
- m.stride = fn(m.stride)
451
- m.grid = list(map(fn, m.grid))
452
- if isinstance(m.anchor_grid, list):
453
- m.anchor_grid = list(map(fn, m.anchor_grid))
 
454
  return self
455
 
456
  @torch.no_grad()
@@ -465,7 +473,7 @@ class AutoShape(nn.Module):
465
  # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
466
 
467
  t = [time_sync()]
468
- p = next(self.model.parameters()) # for device and type
469
  if isinstance(imgs, torch.Tensor): # torch
470
  with amp.autocast(enabled=p.device.type != 'cpu'):
471
  return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
@@ -489,8 +497,8 @@ class AutoShape(nn.Module):
489
  g = (size / max(s)) # gain
490
  shape1.append([y * g for y in s])
491
  imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
492
- shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
493
- x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
494
  x = np.stack(x, 0) if n > 1 else x[0][None] # stack
495
  x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
496
  x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
@@ -498,12 +506,12 @@ class AutoShape(nn.Module):
498
 
499
  with amp.autocast(enabled=p.device.type != 'cpu'):
500
  # Inference
501
- y = self.model(x, augment, profile)[0] # forward
502
  t.append(time_sync())
503
 
504
  # Post-process
505
- y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes,
506
- multi_label=self.multi_label, max_det=self.max_det) # NMS
507
  for i in range(n):
508
  scale_coords(shape1, y[i][:, :4], shape0[i])
509
 
 
276
 
277
  class DetectMultiBackend(nn.Module):
278
  # YOLOv5 MultiBackend class for python inference on various backends
279
+ def __init__(self, weights='yolov5s.pt', device=None, dnn=False):
280
  # Usage:
281
  # PyTorch: weights = *.pt
282
  # TorchScript: *.torchscript
 
287
  # ONNX Runtime: *.onnx
288
  # OpenCV DNN: *.onnx with dnn=True
289
  # TensorRT: *.engine
290
+ from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
291
+
292
  super().__init__()
293
  w = str(weights[0] if isinstance(weights, list) else weights)
294
  suffix = Path(w).suffix.lower()
 
296
  check_suffix(w, suffixes) # check weights have acceptable suffix
297
  pt, jit, onnx, engine, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans
298
  stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
299
+ attempt_download(w) # download if not local
300
 
301
  if jit: # TorchScript
302
  LOGGER.info(f'Loading {w} for TorchScript inference...')
 
306
  d = json.loads(extra_files['config.txt']) # extra_files dict
307
  stride, names = int(d['stride']), d['names']
308
  elif pt: # PyTorch
 
309
  model = attempt_load(weights, map_location=device)
310
  stride = int(model.stride.max()) # model stride
311
  names = model.module.names if hasattr(model, 'module') else model.names # get class names
312
+ self.model = model # explicitly assign for to(), cpu(), cuda(), half()
313
  elif coreml: # CoreML
314
+ LOGGER.info(f'Loading {w} for CoreML inference...')
315
  import coremltools as ct
316
  model = ct.models.MLModel(w)
317
  elif dnn: # ONNX OpenCV DNN
 
320
  net = cv2.dnn.readNetFromONNX(w)
321
  elif onnx: # ONNX Runtime
322
  LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
323
+ check_requirements(('onnx', 'onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime'))
324
  import onnxruntime
325
  session = onnxruntime.InferenceSession(w, None)
326
  elif engine: # TensorRT
 
380
  if self.pt: # PyTorch
381
  y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
382
  return y if val else y[0]
383
+ elif self.coreml: # CoreML
384
  im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
385
  im = Image.fromarray((im[0] * 255).astype('uint8'))
386
  # im = im.resize((192, 320), Image.ANTIALIAS)
 
437
  # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
438
  conf = 0.25 # NMS confidence threshold
439
  iou = 0.45 # NMS IoU threshold
440
+ agnostic = False # NMS class-agnostic
441
  multi_label = False # NMS multiple labels per box
442
+ classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
443
  max_det = 1000 # maximum number of detections per image
444
 
445
  def __init__(self, model):
446
  super().__init__()
447
  LOGGER.info('Adding AutoShape... ')
448
  copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
449
+ self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
450
+ self.pt = not self.dmb or model.pt # PyTorch model
451
  self.model = model.eval()
452
 
453
  def _apply(self, fn):
454
  # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
455
  self = super()._apply(fn)
456
+ if self.pt:
457
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
458
+ m.stride = fn(m.stride)
459
+ m.grid = list(map(fn, m.grid))
460
+ if isinstance(m.anchor_grid, list):
461
+ m.anchor_grid = list(map(fn, m.anchor_grid))
462
  return self
463
 
464
  @torch.no_grad()
 
473
  # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
474
 
475
  t = [time_sync()]
476
+ p = next(self.model.parameters()) if self.pt else torch.zeros(1) # for device and type
477
  if isinstance(imgs, torch.Tensor): # torch
478
  with amp.autocast(enabled=p.device.type != 'cpu'):
479
  return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
 
497
  g = (size / max(s)) # gain
498
  shape1.append([y * g for y in s])
499
  imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
500
+ shape1 = [make_divisible(x, self.stride) for x in np.stack(shape1, 0).max(0)] # inference shape
501
+ x = [letterbox(im, new_shape=shape1 if self.pt else size, auto=False)[0] for im in imgs] # pad
502
  x = np.stack(x, 0) if n > 1 else x[0][None] # stack
503
  x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
504
  x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
 
506
 
507
  with amp.autocast(enabled=p.device.type != 'cpu'):
508
  # Inference
509
+ y = self.model(x, augment, profile) # forward
510
  t.append(time_sync())
511
 
512
  # Post-process
513
+ y = non_max_suppression(y if self.dmb else y[0], self.conf, iou_thres=self.iou, classes=self.classes,
514
+ agnostic=self.agnostic, multi_label=self.multi_label, max_det=self.max_det) # NMS
515
  for i in range(n):
516
  scale_coords(shape1, y[i][:, :4], shape0[i])
517
 
utils/general.py CHANGED
@@ -455,7 +455,9 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
455
 
456
 
457
  def make_divisible(x, divisor):
458
- # Returns x evenly divisible by divisor
 
 
459
  return math.ceil(x / divisor) * divisor
460
 
461
 
 
455
 
456
 
457
  def make_divisible(x, divisor):
458
+ # Returns nearest x divisible by divisor
459
+ if isinstance(divisor, torch.Tensor):
460
+ divisor = int(divisor.max()) # to int
461
  return math.ceil(x / divisor) * divisor
462
 
463