glenn-jocher commited on
Commit
245d645
·
unverified ·
1 Parent(s): 32661f7

Add callbacks (#7315)

Browse files

* Add `on_train_start()` callback

* Update

* Update

Files changed (3) hide show
  1. train.py +4 -0
  2. utils/loggers/__init__.py +4 -0
  3. val.py +4 -0
train.py CHANGED
@@ -66,6 +66,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
66
  save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
67
  Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
68
  opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
 
69
 
70
  # Directories
71
  w = save_dir / 'weights' # weights dir
@@ -291,11 +292,13 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
291
  scaler = amp.GradScaler(enabled=cuda)
292
  stopper = EarlyStopping(patience=opt.patience)
293
  compute_loss = ComputeLoss(model) # init loss class
 
294
  LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
295
  f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
296
  f"Logging results to {colorstr('bold', save_dir)}\n"
297
  f'Starting training for {epochs} epochs...')
298
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
 
299
  model.train()
300
 
301
  # Update image weights (optional, single-GPU only)
@@ -317,6 +320,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
317
  pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
318
  optimizer.zero_grad()
319
  for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
 
320
  ni = i + nb * epoch # number integrated batches (since train start)
321
  imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
322
 
 
66
  save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
67
  Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
68
  opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
69
+ callbacks.run('on_pretrain_routine_start')
70
 
71
  # Directories
72
  w = save_dir / 'weights' # weights dir
 
292
  scaler = amp.GradScaler(enabled=cuda)
293
  stopper = EarlyStopping(patience=opt.patience)
294
  compute_loss = ComputeLoss(model) # init loss class
295
+ callbacks.run('on_train_start')
296
  LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
297
  f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
298
  f"Logging results to {colorstr('bold', save_dir)}\n"
299
  f'Starting training for {epochs} epochs...')
300
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
301
+ callbacks.run('on_train_epoch_start')
302
  model.train()
303
 
304
  # Update image weights (optional, single-GPU only)
 
320
  pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
321
  optimizer.zero_grad()
322
  for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
323
+ callbacks.run('on_train_batch_start')
324
  ni = i + nb * epoch # number integrated batches (since train start)
325
  imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
326
 
utils/loggers/__init__.py CHANGED
@@ -84,6 +84,10 @@ class Loggers():
84
  else:
85
  self.wandb = None
86
 
 
 
 
 
87
  def on_pretrain_routine_end(self):
88
  # Callback runs on pre-train routine end
89
  paths = self.save_dir.glob('*labels*.jpg') # training labels
 
84
  else:
85
  self.wandb = None
86
 
87
+ def on_train_start(self):
88
+ # Callback runs on train start
89
+ pass
90
+
91
  def on_pretrain_routine_end(self):
92
  # Callback runs on pre-train routine end
93
  paths = self.save_dir.glob('*labels*.jpg') # training labels
val.py CHANGED
@@ -188,8 +188,10 @@ def run(
188
  dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
189
  loss = torch.zeros(3, device=device)
190
  jdict, stats, ap, ap_class = [], [], [], []
 
191
  pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
192
  for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
 
193
  t1 = time_sync()
194
  if cuda:
195
  im = im.to(device, non_blocking=True)
@@ -260,6 +262,8 @@ def run(
260
  f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions
261
  Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
262
 
 
 
263
  # Compute metrics
264
  stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
265
  if len(stats) and stats[0].any():
 
188
  dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
189
  loss = torch.zeros(3, device=device)
190
  jdict, stats, ap, ap_class = [], [], [], []
191
+ callbacks.run('on_val_start')
192
  pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
193
  for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
194
+ callbacks.run('on_val_batch_start')
195
  t1 = time_sync()
196
  if cuda:
197
  im = im.to(device, non_blocking=True)
 
262
  f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions
263
  Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
264
 
265
+ callbacks.run('on_val_batch_end')
266
+
267
  # Compute metrics
268
  stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
269
  if len(stats) and stats[0].any():