Commit
·
16f6834
1
Parent(s):
1b9e28e
update train.py and experimental.py
Browse files- models/experimental.py +4 -1
- train.py +10 -12
models/experimental.py
CHANGED
@@ -119,7 +119,10 @@ class Ensemble(nn.ModuleList):
|
|
119 |
y = []
|
120 |
for module in self:
|
121 |
y.append(module(x, augment)[0])
|
122 |
-
|
|
|
|
|
|
|
123 |
|
124 |
|
125 |
def attempt_load(weights, map_location=None):
|
|
|
119 |
y = []
|
120 |
for module in self:
|
121 |
y.append(module(x, augment)[0])
|
122 |
+
# y = torch.stack(y).max(0)[0] # max ensemble
|
123 |
+
# y = torch.cat(y, 1) # nms ensemble
|
124 |
+
y = torch.stack(y).mean(0) # mean ensemble
|
125 |
+
return y, None # inference, train output
|
126 |
|
127 |
|
128 |
def attempt_load(weights, map_location=None):
|
train.py
CHANGED
@@ -101,11 +101,13 @@ def train(hyp):
|
|
101 |
optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
|
102 |
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
|
103 |
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
|
|
|
|
|
|
|
104 |
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
|
105 |
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
|
106 |
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
|
107 |
-
|
108 |
-
del pg0, pg1, pg2
|
109 |
|
110 |
# Load Model
|
111 |
google_utils.attempt_download(weights)
|
@@ -147,12 +149,7 @@ def train(hyp):
|
|
147 |
if mixed_precision:
|
148 |
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
|
149 |
|
150 |
-
|
151 |
-
scheduler.last_epoch = start_epoch - 1 # do not move
|
152 |
-
# https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
|
153 |
-
# plot_lr_scheduler(optimizer, scheduler, epochs)
|
154 |
-
|
155 |
-
# Initialize distributed training
|
156 |
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
|
157 |
dist.init_process_group(backend='nccl', # distributed backend
|
158 |
init_method='tcp://127.0.0.1:9999', # init method
|
@@ -198,9 +195,10 @@ def train(hyp):
|
|
198 |
# Start training
|
199 |
t0 = time.time()
|
200 |
nb = len(dataloader) # number of batches
|
201 |
-
|
202 |
maps = np.zeros(nc) # mAP per class
|
203 |
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
|
|
|
204 |
print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
|
205 |
print('Using %g dataloader workers' % dataloader.num_workers)
|
206 |
print('Starting training for %g epochs...' % epochs)
|
@@ -225,9 +223,9 @@ def train(hyp):
|
|
225 |
ni = i + nb * epoch # number integrated batches (since train start)
|
226 |
imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
|
227 |
|
228 |
-
#
|
229 |
-
if ni <=
|
230 |
-
xi = [0,
|
231 |
# model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)
|
232 |
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
|
233 |
for j, x in enumerate(optimizer.param_groups):
|
|
|
101 |
optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
|
102 |
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
|
103 |
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
|
104 |
+
print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
|
105 |
+
del pg0, pg1, pg2
|
106 |
+
|
107 |
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
|
108 |
lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
|
109 |
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
|
110 |
+
# plot_lr_scheduler(optimizer, scheduler, epochs)
|
|
|
111 |
|
112 |
# Load Model
|
113 |
google_utils.attempt_download(weights)
|
|
|
149 |
if mixed_precision:
|
150 |
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
|
151 |
|
152 |
+
# Distributed training
|
|
|
|
|
|
|
|
|
|
|
153 |
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
|
154 |
dist.init_process_group(backend='nccl', # distributed backend
|
155 |
init_method='tcp://127.0.0.1:9999', # init method
|
|
|
195 |
# Start training
|
196 |
t0 = time.time()
|
197 |
nb = len(dataloader) # number of batches
|
198 |
+
nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
|
199 |
maps = np.zeros(nc) # mAP per class
|
200 |
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
|
201 |
+
scheduler.last_epoch = start_epoch - 1 # do not move
|
202 |
print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
|
203 |
print('Using %g dataloader workers' % dataloader.num_workers)
|
204 |
print('Starting training for %g epochs...' % epochs)
|
|
|
223 |
ni = i + nb * epoch # number integrated batches (since train start)
|
224 |
imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
|
225 |
|
226 |
+
# Warmup
|
227 |
+
if ni <= nw:
|
228 |
+
xi = [0, nw] # x interp
|
229 |
# model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)
|
230 |
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
|
231 |
for j, x in enumerate(optimizer.param_groups):
|