glenn-jocher commited on
Commit
701e117
·
unverified ·
1 Parent(s): 52c1399

Tensor initialization on device improvements (#6959)

Browse files

* Update common.py speed improvements

Eliminate .to() ops where possible for reduced data transfer overhead. Primarily affects warmup and PyTorch Hub inference.

* Updates

* Updates

* Update detect.py

* Update val.py

Files changed (2) hide show
  1. models/common.py +1 -1
  2. val.py +3 -3
models/common.py CHANGED
@@ -466,7 +466,7 @@ class DetectMultiBackend(nn.Module):
466
  # Warmup model by running inference once
467
  if self.pt or self.jit or self.onnx or self.engine: # warmup types
468
  if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
469
- im = torch.zeros(*imgsz).to(self.device).type(torch.half if self.fp16 else torch.float) # input image
470
  self.forward(im) # warmup
471
 
472
  @staticmethod
 
466
  # Warmup model by running inference once
467
  if self.pt or self.jit or self.onnx or self.engine: # warmup types
468
  if isinstance(self.device, torch.device) and self.device.type != 'cpu': # only warmup GPU models
469
+ im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
470
  self.forward(im) # warmup
471
 
472
  @staticmethod
val.py CHANGED
@@ -87,7 +87,7 @@ def process_batch(detections, labels, iouv):
87
  matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
88
  # matches = matches[matches[:, 2].argsort()[::-1]]
89
  matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
90
- matches = torch.Tensor(matches).to(iouv.device)
91
  correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
92
  return correct
93
 
@@ -155,7 +155,7 @@ def run(data,
155
  cuda = device.type != 'cpu'
156
  is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset
157
  nc = 1 if single_cls else int(data['nc']) # number of classes
158
- iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95
159
  niou = iouv.numel()
160
 
161
  # Dataloader
@@ -196,7 +196,7 @@ def run(data,
196
  loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls
197
 
198
  # NMS
199
- targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
200
  lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
201
  t3 = time_sync()
202
  out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
 
87
  matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
88
  # matches = matches[matches[:, 2].argsort()[::-1]]
89
  matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
90
+ matches = torch.from_numpy(matches).to(iouv.device)
91
  correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
92
  return correct
93
 
 
155
  cuda = device.type != 'cpu'
156
  is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset
157
  nc = 1 if single_cls else int(data['nc']) # number of classes
158
+ iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for [email protected]:0.95
159
  niou = iouv.numel()
160
 
161
  # Dataloader
 
196
  loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls
197
 
198
  # NMS
199
+ targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels
200
  lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
201
  t3 = time_sync()
202
  out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)