File size: 2,390 Bytes
4fba92e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# GENERATE TIME: Fri May 24 09:19:37 2024
# CMD:
# train_edlora.py -opt ortho_datasets/train_configs/ortho/0023_moana_ortho.yml

name: 0023_moana_ortho
manual_seed: 1023
mixed_precision: fp16
gradient_accumulation_steps: 1

# dataset and data loader settings
datasets:
  train:
    name: LoraDataset
    concept_list: ortho_datasets/data_configs/moana.json
    use_caption: true
    use_mask: true
    instance_transform:
      - { type: HumanResizeCropFinalV3, size: 512, crop_p: 0.5 }
      - { type: ToTensor }
      - { type: Normalize, mean: [ 0.5 ], std: [ 0.5 ] }
      - { type: ShuffleCaption, keep_token_num: 1 }
      - { type: EnhanceText, enhance_type: human }
    replace_mapping:
      <TOK>: <moana1> <moana2>
    batch_size_per_gpu: 2
    dataset_enlarge_ratio: 500

  val_vis:
    name: PromptDataset
    prompts: datasets/validation_prompts/single-concept/characters/test_girl.txt
    num_samples_per_prompt: 8
    latent_size: [ 4,64,64 ]
    replace_mapping:
      <TOK>: <moana1> <moana2>
    batch_size_per_gpu: 4

models:
  pretrained_path: nitrosocke/mo-di-diffusion
  enable_edlora: true  # true means ED-LoRA, false means vanilla LoRA
  finetune_cfg:
    text_embedding:
      enable_tuning: true
      lr: !!float 1e-3
    text_encoder:
      enable_tuning: true
      lora_cfg:
        rank: 5
        alpha: 1.0
        where: CLIPAttention
      lr: !!float 1e-5
    unet:
      enable_tuning: true
      lora_cfg:
        rank: 5
        alpha: 1.0
        where: Attention
      lr: !!float 1e-4
  new_concept_token: <moana1>+<moana2>
  initializer_token: <rand-0.013>+man
  noise_offset: 0.01
  attn_reg_weight: 0.01
  reg_full_identity: false
  use_mask_loss: true
  gradient_checkpoint: false
  enable_xformers: true

# path
path:
  pretrain_network: ~

# training settings
train:
  optim_g:
    type: AdamW
    lr: !!float 0.0 # no use since we define different component lr in model
    weight_decay: 0.01
    betas: [ 0.9, 0.999 ] # align with taming

  # dropkv
  unet_kv_drop_rate: 0
  scheduler: linear
  emb_norm_threshold: !!float 5.5e-1

# validation settings
val:
  val_during_save: true
  compose_visualize: true
  alpha_list: [0, 0.7, 1.0] # 0 means only visualize embedding (without lora weight)
  sample:
    num_inference_steps: 50
    guidance_scale: 7.5

# logging settings
logger:
  print_freq: 10
  save_checkpoint_freq: !!float 10000