Update train.py
Browse files
train.py
CHANGED
@@ -79,7 +79,6 @@ def train(hyp):
|
|
79 |
# Create model
|
80 |
model = Model(opt.cfg).to(device)
|
81 |
assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
|
82 |
-
|
83 |
|
84 |
# Image sizes
|
85 |
gs = int(max(model.stride)) # grid size (max stride)
|
@@ -133,7 +132,13 @@ def train(hyp):
|
|
133 |
with open(results_file, 'w') as file:
|
134 |
file.write(ckpt['training_results']) # write results.txt
|
135 |
|
|
|
136 |
start_epoch = ckpt['epoch'] + 1
|
|
|
|
|
|
|
|
|
|
|
137 |
del ckpt
|
138 |
|
139 |
# Mixed precision training https://github.com/NVIDIA/apex
|
@@ -147,6 +152,15 @@ def train(hyp):
|
|
147 |
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
|
148 |
# plot_lr_scheduler(optimizer, scheduler, epochs)
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
# Trainloader
|
151 |
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
|
152 |
hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
|
@@ -155,7 +169,7 @@ def train(hyp):
|
|
155 |
|
156 |
# Testloader
|
157 |
testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
|
158 |
-
|
159 |
|
160 |
# Model parameters
|
161 |
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
|
@@ -164,15 +178,6 @@ def train(hyp):
|
|
164 |
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
|
165 |
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
|
166 |
model.names = data_dict['names']
|
167 |
-
|
168 |
-
# Initialize distributed training
|
169 |
-
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
|
170 |
-
dist.init_process_group(backend='nccl', # distributed backend
|
171 |
-
init_method='tcp://127.0.0.1:9999', # init method
|
172 |
-
world_size=1, # number of nodes
|
173 |
-
rank=0) # node rank
|
174 |
-
model = torch.nn.parallel.DistributedDataParallel(model)
|
175 |
-
# pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
|
176 |
|
177 |
# Class frequency
|
178 |
labels = np.concatenate(dataset.labels, 0)
|
@@ -373,7 +378,7 @@ if __name__ == '__main__':
|
|
373 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
|
374 |
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
375 |
opt = parser.parse_args()
|
376 |
-
opt.weights = last if opt.resume else opt.weights
|
377 |
opt.cfg = check_file(opt.cfg) # check file
|
378 |
opt.data = check_file(opt.data) # check file
|
379 |
print(opt)
|
|
|
79 |
# Create model
|
80 |
model = Model(opt.cfg).to(device)
|
81 |
assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
|
|
|
82 |
|
83 |
# Image sizes
|
84 |
gs = int(max(model.stride)) # grid size (max stride)
|
|
|
132 |
with open(results_file, 'w') as file:
|
133 |
file.write(ckpt['training_results']) # write results.txt
|
134 |
|
135 |
+
# epochs
|
136 |
start_epoch = ckpt['epoch'] + 1
|
137 |
+
if epochs < start_epoch:
|
138 |
+
print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
|
139 |
+
(opt.weights, ckpt['epoch'], epochs))
|
140 |
+
epochs += ckpt['epoch'] # finetune additional epochs
|
141 |
+
|
142 |
del ckpt
|
143 |
|
144 |
# Mixed precision training https://github.com/NVIDIA/apex
|
|
|
152 |
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
|
153 |
# plot_lr_scheduler(optimizer, scheduler, epochs)
|
154 |
|
155 |
+
# Initialize distributed training
|
156 |
+
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
|
157 |
+
dist.init_process_group(backend='nccl', # distributed backend
|
158 |
+
init_method='tcp://127.0.0.1:9999', # init method
|
159 |
+
world_size=1, # number of nodes
|
160 |
+
rank=0) # node rank
|
161 |
+
model = torch.nn.parallel.DistributedDataParallel(model)
|
162 |
+
# pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
|
163 |
+
|
164 |
# Trainloader
|
165 |
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
|
166 |
hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
|
|
|
169 |
|
170 |
# Testloader
|
171 |
testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
|
172 |
+
hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
|
173 |
|
174 |
# Model parameters
|
175 |
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
|
|
|
178 |
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
|
179 |
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
|
180 |
model.names = data_dict['names']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
# Class frequency
|
183 |
labels = np.concatenate(dataset.labels, 0)
|
|
|
378 |
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
|
379 |
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
|
380 |
opt = parser.parse_args()
|
381 |
+
opt.weights = last if opt.resume and not opt.weights else opt.weights
|
382 |
opt.cfg = check_file(opt.cfg) # check file
|
383 |
opt.data = check_file(opt.data) # check file
|
384 |
print(opt)
|