File size: 3,688 Bytes
4730cdc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
trainer:
target: trainer.TrainerDifIRLPIPS
autoencoder:
target: ldm.models.autoencoder.VQModelTorch
ckpt_path: weights/celeba256_vq_f4_dim3_face.pth
use_fp16: True
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: False
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
padding_mode: zeros
model:
target: models.unet.UNetModelSwin
ckpt_path: ~
params:
image_size: 64
in_channels: 3
model_channels: 160
out_channels: ${autoencoder.params.embed_dim}
attention_resolutions: [64,32,16,8]
dropout: 0
channel_mult: [1, 2, 2, 4]
num_res_blocks: [2, 2, 2, 2]
conv_resample: True
dims: 2
use_fp16: False
num_head_channels: 32
use_scale_shift_norm: True
resblock_updown: False
swin_depth: 2
swin_embed_dim: 192
window_size: 8
mlp_ratio: 4
cond_lq: True
cond_mask: True
lq_size: 256
diffusion:
target: models.script_util.create_gaussian_diffusion
params:
sf: 1
schedule_name: exponential
schedule_kwargs:
power: 0.3
etas_end: 0.99
steps: 4
min_noise_level: 0.2
kappa: 2.0
weighted_mse: False
predict_type: xstart
timestep_respacing: ~
scale_factor: 1.0
normalize_input: True
latent_flag: True
data:
train:
type: inpainting
params:
dir_path: ~
txt_file_path: /mnt/sfs-common/zsyue/database/FFHQ/files_txt/files256.txt
transform_type: default
transform_kwargs:
mean: 0.5
std: 0.5
need_path: False
im_exts: png
recursive: False
mask_kwargs:
irregular_proba: 1
irregular_kwargs:
max_angle: 4
max_len: 200
max_width: 100
max_times: 5
min_times: 1
box_proba: 1
box_kwargs:
margin: 10
bbox_min_size: 30
bbox_max_size: 150
max_times: 4
min_times: 1
outpainting_proba: 1
outpainting_kwargs:
min_padding_percent: 0.04
max_padding_percent: 0.25
left_padding_prob: 0.5
top_padding_prob: 0.5
right_padding_prob: 0.5
bottom_padding_prob: 0.5
val:
type: inpainting_val
params:
lq_path: testdata/inpainting/face/lq
gt_path: testdata/inpainting/face/hq
mask_path: testdata/inpainting/face/mask
transform_type: default
transform_kwargs:
mean: 0.5
std: 0.5
im_exts: png
recursive: False
train:
# learning rate
lr: 5e-5 # learning rate
lr_min: 2e-5
lr_schedule: cosin
warmup_iterations: 5000
# dataloader
batch: [96, 2]
microbatch: 12
num_workers: 6
prefetch_factor: 2
# optimization settings
weight_decay: 0
ema_rate: 0.999
iterations: 400000 # total iterations
# save logging
save_freq: 10000
log_freq: [200, 2000, 1] # [training loss, training images, val images]
loss_coef: [1.0, 10.0] # [mse, lpips]
local_logging: True # manually save images
tf_logging: False # tensorboard logging
# validation settings
use_ema_val: True
val_freq: ${train.save_freq}
val_y_channel: True
val_resolution: 256
val_padding_mode: reflect
# training setting
use_amp: True # amp training
seed: 123456 # random seed
global_seeding: False
# model compile
compile:
flag: False
mode: reduce-overhead
|