trainer: target: trainer.TrainerDifIRLPIPS autoencoder: target: ldm.models.autoencoder.VQModelTorch ckpt_path: weights/autoencoder/autoencoder_vq_f4.pth use_fp16: True params: embed_dim: 3 n_embed: 8192 ddconfig: double_z: False z_channels: 3 resolution: 256 in_channels: 3 out_ch: 3 ch: 128 ch_mult: - 1 - 2 - 4 num_res_blocks: 2 attn_resolutions: [] dropout: 0.0 padding_mode: zeros model: target: models.unet.UNetModelSwin ckpt_path: ~ params: image_size: 64 in_channels: 3 model_channels: 160 out_channels: 3 attention_resolutions: [64,32,16,8] dropout: 0 channel_mult: [1, 2, 2, 4] num_res_blocks: [2, 2, 2, 2] conv_resample: True dims: 2 use_fp16: False num_head_channels: 32 use_scale_shift_norm: True resblock_updown: False swin_depth: 2 swin_embed_dim: 192 window_size: 8 mlp_ratio: 4 cond_lq: True lq_size: 64 diffusion: target: models.script_util.create_gaussian_diffusion params: sf: 4 schedule_name: exponential schedule_kwargs: power: 0.3 etas_end: 0.99 steps: 4 min_noise_level: 0.2 kappa: 2.0 weighted_mse: False predict_type: xstart timestep_respacing: ~ scale_factor: 1.0 normalize_input: True latent_flag: True data: train: type: bicubic params: source_path: ~ source_txt_path: - /mnt/sfs-common/zsyue/database/ImageNet/files_txt/path_train_all.txt - /mnt/sfs-common/zsyue/database/FFHQ/files_txt/files256.txt degrade_kwargs: scale: 0.25 activate_matlab: True resize_back: False pch_size: 256 pass_smallmaxresize: False pass_aug: False pass_crop: False transform_type: default transform_kwargs: mean: 0.5 std: 0.5 length: ~ need_path: False im_exts: JPEG recursive: False val: type: bicubic params: source_path: /mnt/sfs-common/zsyue/projects/ResShift/SR/testingdata/imagenet256/gt degrade_kwargs: scale: 0.25 activate_matlab: True resize_back: False pch_size: 256 pass_smallmaxresize: True pass_aug: True pass_crop: True transform_type: default transform_kwargs: mean: 0.5 std: 0.5 length: 64 need_path: False im_exts: png recursive: False train: # learning rate lr: 5e-5 # learning rate lr_min: 2e-5 # learning rate lr_schedule: cosin warmup_iterations: 5000 # dataloader batch: [96, 8] microbatch: 12 num_workers: 6 prefetch_factor: 2 # optimization settings weight_decay: 0 ema_rate: 0.999 iterations: 400000 # total iterations # save logging save_freq: 10000 log_freq: [1000, 2000, 1] # [training loss, training images, val images] loss_coef: [1.0, 1.0] # [mse, lpips] local_logging: True # manually save images tf_logging: False # tensorboard logging # validation settings use_ema_val: True val_freq: ${train.save_freq} val_y_channel: True val_resolution: ${model.params.lq_size} val_padding_mode: reflect # training setting use_amp: True # amp training seed: 123456 # random seed global_seeding: False # model compile compile: flag: True mode: reduce-overhead # default, reduce-overhead