add logic for resuming and getting hyp for resume run
Browse files
train.py
CHANGED
@@ -63,7 +63,7 @@ def train(hyp):
|
|
63 |
os.makedirs(wdir, exist_ok=True)
|
64 |
last = wdir + 'last.pt'
|
65 |
best = wdir + 'best.pt'
|
66 |
-
results_file = 'results.txt'
|
67 |
|
68 |
epochs = opt.epochs # 300
|
69 |
batch_size = opt.batch_size # 64
|
@@ -360,7 +360,7 @@ def train(hyp):
|
|
360 |
if len(n):
|
361 |
n = '_' + n if not n.isnumeric() else n
|
362 |
fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
|
363 |
-
for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
|
364 |
if os.path.exists(f1):
|
365 |
os.rename(f1, f2) # rename
|
366 |
ispt = f2.endswith('.pt') # is *.pt
|
@@ -382,10 +382,10 @@ if __name__ == '__main__':
|
|
382 |
parser.add_argument('--batch-size', type=int, default=16)
|
383 |
parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='*.cfg path')
|
384 |
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
|
385 |
-
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes')
|
386 |
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
387 |
parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
|
388 |
-
parser.add_argument('--
|
389 |
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
390 |
parser.add_argument('--notest', action='store_true', help='only test final epoch')
|
391 |
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
|
@@ -397,18 +397,30 @@ if __name__ == '__main__':
|
|
397 |
parser.add_argument('--adam', action='store_true', help='use adam optimizer')
|
398 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
|
399 |
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
400 |
-
parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file')
|
401 |
opt = parser.parse_args()
|
402 |
|
403 |
-
if
|
|
|
|
|
|
|
|
|
|
|
404 |
last = get_latest_run()
|
405 |
print(f'WARNING: No run provided to resume from. Resuming from most recent run found at {last}')
|
406 |
else:
|
407 |
-
last =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
opt.weights = last if opt.resume else opt.weights
|
409 |
opt.cfg = check_file(opt.cfg) # check file
|
410 |
opt.data = check_file(opt.data) # check file
|
411 |
-
opt.hyp = check_file(opt.hyp) #check file
|
412 |
print(opt)
|
413 |
opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
|
414 |
device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size)
|
|
|
63 |
os.makedirs(wdir, exist_ok=True)
|
64 |
last = wdir + 'last.pt'
|
65 |
best = wdir + 'best.pt'
|
66 |
+
results_file = wdir + 'results.txt'
|
67 |
|
68 |
epochs = opt.epochs # 300
|
69 |
batch_size = opt.batch_size # 64
|
|
|
360 |
if len(n):
|
361 |
n = '_' + n if not n.isnumeric() else n
|
362 |
fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
|
363 |
+
for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', wdir + 'results.txt'], [flast, fbest, fresults]):
|
364 |
if os.path.exists(f1):
|
365 |
os.rename(f1, f2) # rename
|
366 |
ispt = f2.endswith('.pt') # is *.pt
|
|
|
382 |
parser.add_argument('--batch-size', type=int, default=16)
|
383 |
parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='*.cfg path')
|
384 |
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
|
385 |
+
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='train,test sizes. Assumes square imgs.')
|
386 |
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
387 |
parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
|
388 |
+
parser.add_argument('--resume-from-run', type=str, default='', help='resume training from last.pt in this dir')
|
389 |
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
390 |
parser.add_argument('--notest', action='store_true', help='only test final epoch')
|
391 |
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
|
|
|
397 |
parser.add_argument('--adam', action='store_true', help='use adam optimizer')
|
398 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
|
399 |
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
400 |
+
parser.add_argument('--hyp', type=str, default='', help ='path to hyp yaml file. Not needed with --resume.')
|
401 |
opt = parser.parse_args()
|
402 |
|
403 |
+
# logic to resume from latest run if either --resume or --resume-from-run is selected
|
404 |
+
# Note if neither --resume or --resume-from-run, last is set to empty string
|
405 |
+
if opt.resume_from_run:
|
406 |
+
opt.resume = True
|
407 |
+
last = opt.resume_from_run
|
408 |
+
elif opt.resume and not opt.resume_from_run:
|
409 |
last = get_latest_run()
|
410 |
print(f'WARNING: No run provided to resume from. Resuming from most recent run found at {last}')
|
411 |
else:
|
412 |
+
last = ''
|
413 |
+
|
414 |
+
# if resuming, check for hyp file
|
415 |
+
if last:
|
416 |
+
last_hyp = last.replace('last.pt', 'hyp.yaml')
|
417 |
+
if os.path.exists(last_hyp):
|
418 |
+
opt.hyp = last_hyp
|
419 |
+
|
420 |
opt.weights = last if opt.resume else opt.weights
|
421 |
opt.cfg = check_file(opt.cfg) # check file
|
422 |
opt.data = check_file(opt.data) # check file
|
423 |
+
opt.hyp = check_file(opt.hyp) if opt.hyp else '' #check file
|
424 |
print(opt)
|
425 |
opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
|
426 |
device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size)
|