File size: 3,356 Bytes
4730cdc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
trainer:
target: trainer.TrainerDifIRLPIPS
model:
target: models.unet.UNetModelSwin
ckpt_path: ~
params:
image_size: 64
in_channels: 8
model_channels: 160
out_channels: 8
attention_resolutions: [64,32,16,8]
dropout: 0
channel_mult: [1, 2, 2, 4]
num_res_blocks: [2, 2, 2, 2]
conv_resample: True
dims: 2
use_fp16: False
num_head_channels: 32
use_scale_shift_norm: True
resblock_updown: False
swin_depth: 2
swin_embed_dim: 192
window_size: 8
mlp_ratio: 4
cond_lq: True
lq_size: 512
diffusion:
target: models.script_util.create_gaussian_diffusion
params:
sf: 1
schedule_name: exponential
schedule_kwargs:
power: 0.3
etas_end: 0.99
steps: 4
min_noise_level: 0.2
kappa: 2.0
weighted_mse: False
predict_type: xstart
timestep_respacing: ~
scale_factor: 1.0
normalize_input: True
latent_flag: True
autoencoder:
target: ldm.models.autoencoder.VQModelTorch
ckpt_path: weights/ffhq512_vq_f8_dim8_face.pth
use_fp16: True
params:
embed_dim: 8
n_embed: 4096
ddconfig:
double_z: False
z_channels: 8
resolution: 512
in_channels: 3
out_ch: 3
ch: 64
ch_mult:
- 1
- 2
- 4
- 8
num_res_blocks:
- 1
- 2
- 3
- 4
attn_resolutions: []
dropout: 0.0
padding_mode: zeros
data:
train:
type: gfpgan
params:
dir_path: /mnt/sfs-common/zsyue/database/FFHQ/images512x512
im_exts: png
io_backend:
type: disk
use_hflip: true
mean: [0.5, 0.5, 0.5]
std: [0.5, 0.5, 0.5]
out_size: 512
blur_kernel_size: 41
kernel_list: ['iso', 'aniso']
kernel_prob: [0.5, 0.5]
blur_sigma: [0.1, 15]
downsample_range: [0.8, 32]
noise_range: [0, 20]
jpeg_range: [30, 100]
color_jitter_prob: ~
color_jitter_pt_prob: ~
gray_prob: 0.01
gt_gray: True
need_gt_path: False
val:
type: base
params:
dir_path: testdata/faceir/cropped_faces/lq
transform_type: default
transform_kwargs:
mean: 0.5
std: 0.5
im_exts: png
need_path: False
recursive: False
train:
# learning rate
lr: 5e-5 # learning rate
lr_min: 2e-5
lr_schedule: cosin
warmup_iterations: 5000
# dataloader
batch: [56, 8]
microbatch: 7
num_workers: 6
prefetch_factor: 2
# optimization settings
weight_decay: 0
ema_rate: 0.999
iterations: 400000 # total iterations
# save logging
save_freq: 10000
log_freq: [200, 2000, 1] # [training loss, training images, val images]
loss_coef: [1.0, 10.0] # [mse, lpips]
local_logging: True # manually save images
tf_logging: False # tensorboard logging
# validation settings
use_ema_val: True
val_freq: ${train.save_freq}
val_y_channel: True
val_resolution: ${model.params.lq_size}
val_padding_mode: reflect
# training setting
use_amp: True # amp training
seed: 123456 # random seed
global_seeding: False
# model compile
compile:
flag: False
mode: reduce-overhead # default, reduce-overhead
|