|
import torch
|
|
import torch.nn as nn
|
|
from torch.nn import init
|
|
import functools
|
|
from torch.optim import lr_scheduler
|
|
from util.util import to_device, load_network
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def init_weights(net, init_type='normal', init_gain=0.02):
|
|
"""Initialize network weights.
|
|
|
|
Parameters:
|
|
net (network) -- network to be initialized
|
|
init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal
|
|
init_gain (float) -- scaling factor for normal, xavier and orthogonal.
|
|
|
|
We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
|
|
work better for some applications. Feel free to try yourself.
|
|
"""
|
|
def init_func(m):
|
|
classname = m.__class__.__name__
|
|
if (isinstance(m, nn.Conv2d)
|
|
or isinstance(m, nn.Linear)
|
|
or isinstance(m, nn.Embedding)):
|
|
|
|
if init_type == 'N02':
|
|
init.normal_(m.weight.data, 0.0, init_gain)
|
|
elif init_type in ['glorot', 'xavier']:
|
|
init.xavier_normal_(m.weight.data, gain=init_gain)
|
|
elif init_type == 'kaiming':
|
|
init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
|
|
elif init_type == 'ortho':
|
|
init.orthogonal_(m.weight.data, gain=init_gain)
|
|
else:
|
|
raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
|
|
|
|
|
|
|
|
|
|
|
|
if init_type in ['N02', 'glorot', 'xavier', 'kaiming', 'ortho']:
|
|
print('initialize network with %s' % init_type)
|
|
net.apply(init_func)
|
|
else:
|
|
print('loading the model from %s' % init_type)
|
|
net = load_network(net, init_type, 'latest')
|
|
return net
|
|
|
|
def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]):
|
|
"""Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights
|
|
Parameters:
|
|
net (network) -- the network to be initialized
|
|
init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal
|
|
gain (float) -- scaling factor for normal, xavier and orthogonal.
|
|
gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2
|
|
|
|
Return an initialized network.
|
|
"""
|
|
if len(gpu_ids) > 0:
|
|
assert(torch.cuda.is_available())
|
|
net.to(gpu_ids[0])
|
|
net = torch.nn.DataParallel(net, gpu_ids)
|
|
init_weights(net, init_type, init_gain=init_gain)
|
|
return net
|
|
|
|
|
|
def get_scheduler(optimizer, opt):
|
|
"""Return a learning rate scheduler
|
|
|
|
Parameters:
|
|
optimizer -- the optimizer of the network
|
|
opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions.
|
|
opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine
|
|
|
|
For 'linear', we keep the same learning rate for the first <opt.niter> epochs
|
|
and linearly decay the rate to zero over the next <opt.niter_decay> epochs.
|
|
For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers.
|
|
See https://pytorch.org/docs/stable/optim.html for more details.
|
|
"""
|
|
if opt.lr_policy == 'linear':
|
|
def lambda_rule(epoch):
|
|
lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1)
|
|
return lr_l
|
|
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
|
|
elif opt.lr_policy == 'step':
|
|
scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1)
|
|
elif opt.lr_policy == 'plateau':
|
|
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
|
|
elif opt.lr_policy == 'cosine':
|
|
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.niter, eta_min=0)
|
|
else:
|
|
return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
|
|
return scheduler
|
|
|
|
|