trainer: | |
target: trainer.TrainerDifIRLPIPS | |
model: | |
target: models.unet.UNetModelSwin | |
ckpt_path: ~ | |
params: | |
image_size: 64 | |
in_channels: 8 | |
model_channels: 160 | |
out_channels: 8 | |
attention_resolutions: [64,32,16,8] | |
dropout: 0 | |
channel_mult: [1, 2, 2, 4] | |
num_res_blocks: [2, 2, 2, 2] | |
conv_resample: True | |
dims: 2 | |
use_fp16: False | |
num_head_channels: 32 | |
use_scale_shift_norm: True | |
resblock_updown: False | |
swin_depth: 2 | |
swin_embed_dim: 192 | |
window_size: 8 | |
mlp_ratio: 4 | |
cond_lq: True | |
lq_size: 512 | |
diffusion: | |
target: models.script_util.create_gaussian_diffusion | |
params: | |
sf: 1 | |
schedule_name: exponential | |
schedule_kwargs: | |
power: 0.3 | |
etas_end: 0.99 | |
steps: 4 | |
min_noise_level: 0.2 | |
kappa: 2.0 | |
weighted_mse: False | |
predict_type: xstart | |
timestep_respacing: ~ | |
scale_factor: 1.0 | |
normalize_input: True | |
latent_flag: True | |
autoencoder: | |
target: ldm.models.autoencoder.VQModelTorch | |
ckpt_path: weights/ffhq512_vq_f8_dim8_face.pth | |
use_fp16: True | |
params: | |
embed_dim: 8 | |
n_embed: 4096 | |
ddconfig: | |
double_z: False | |
z_channels: 8 | |
resolution: 512 | |
in_channels: 3 | |
out_ch: 3 | |
ch: 64 | |
ch_mult: | |
- 1 | |
- 2 | |
- 4 | |
- 8 | |
num_res_blocks: | |
- 1 | |
- 2 | |
- 3 | |
- 4 | |
attn_resolutions: [] | |
dropout: 0.0 | |
padding_mode: zeros | |
data: | |
train: | |
type: gfpgan | |
params: | |
dir_path: /mnt/sfs-common/zsyue/database/FFHQ/images512x512 | |
im_exts: png | |
io_backend: | |
type: disk | |
use_hflip: true | |
mean: [0.5, 0.5, 0.5] | |
std: [0.5, 0.5, 0.5] | |
out_size: 512 | |
blur_kernel_size: 41 | |
kernel_list: ['iso', 'aniso'] | |
kernel_prob: [0.5, 0.5] | |
blur_sigma: [0.1, 15] | |
downsample_range: [0.8, 32] | |
noise_range: [0, 20] | |
jpeg_range: [30, 100] | |
color_jitter_prob: ~ | |
color_jitter_pt_prob: ~ | |
gray_prob: 0.01 | |
gt_gray: True | |
need_gt_path: False | |
val: | |
type: base | |
params: | |
dir_path: testdata/faceir/cropped_faces/lq | |
transform_type: default | |
transform_kwargs: | |
mean: 0.5 | |
std: 0.5 | |
im_exts: png | |
need_path: False | |
recursive: False | |
train: | |
# learning rate | |
lr: 5e-5 # learning rate | |
lr_min: 2e-5 | |
lr_schedule: cosin | |
warmup_iterations: 5000 | |
# dataloader | |
batch: [56, 8] | |
microbatch: 7 | |
num_workers: 6 | |
prefetch_factor: 2 | |
# optimization settings | |
weight_decay: 0 | |
ema_rate: 0.999 | |
iterations: 400000 # total iterations | |
# save logging | |
save_freq: 10000 | |
log_freq: [200, 2000, 1] # [training loss, training images, val images] | |
loss_coef: [1.0, 10.0] # [mse, lpips] | |
local_logging: True # manually save images | |
tf_logging: False # tensorboard logging | |
# validation settings | |
use_ema_val: True | |
val_freq: ${train.save_freq} | |
val_y_channel: True | |
val_resolution: ${model.params.lq_size} | |
val_padding_mode: reflect | |
# training setting | |
use_amp: True # amp training | |
seed: 123456 # random seed | |
global_seeding: False | |
# model compile | |
compile: | |
flag: False | |
mode: reduce-overhead # default, reduce-overhead | |