Spaces:

xiang-wuu
/

yolov5

Runtime error

yxNONG commited on Jun 30, 2020

Commit

cdb9bde

unverified ·

1 Parent(s): bfd51f6

Unify the check point of single and multi GPU

save the model.hyp etc to checkpoint when use multi GPU training

Files changed (1) hide show

train.py CHANGED Viewed

@@ -79,7 +79,7 @@ def train(hyp):
     # Create model
     model = Model(opt.cfg).to(device)
     assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
-    model.names = data_dict['names']
     # Image sizes
     gs = int(max(model.stride))  # grid size (max stride)
@@ -172,6 +172,7 @@ def train(hyp):
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
     # Class frequency
     labels = np.concatenate(dataset.labels, 0)
@@ -314,6 +315,14 @@ def train(hyp):
         # Save model
         save = (not opt.nosave) or (final_epoch and not opt.evolve)
         if save:
             with open(results_file, 'r') as f:  # create checkpoint
                 ckpt = {'epoch': epoch,
                         'best_fitness': best_fitness,

     # Create model
     model = Model(opt.cfg).to(device)
     assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
     # Image sizes
     gs = int(max(model.stride))  # grid size (max stride)
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
+    model.names = data_dict['names']
     # Class frequency
     labels = np.concatenate(dataset.labels, 0)
         # Save model
         save = (not opt.nosave) or (final_epoch and not opt.evolve)
         if save:
+            if hasattr(model, 'module'):
+                # Duplicate Model parameters for Multi-GPU save
+                ema.ema.module.nc = model.nc  # attach number of classes to model
+                ema.ema.module.hyp = model.hyp  # attach hyperparameters to model
+                ema.ema.module.gr = model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
+                ema.ema.module.class_weights = model.class_weights # attach class weights
+                ema.ema.module.names = data_dict['names']
             with open(results_file, 'r') as f:  # create checkpoint
                 ckpt = {'epoch': epoch,
                         'best_fitness': best_fitness,