# GENERATE TIME: Fri May 24 08:49:55 2024 | |
# CMD: | |
# train_edlora.py -opt ortho_datasets/train_configs/ortho/0022_elsa_ortho.yml | |
name: 0022_elsa_ortho | |
manual_seed: 1022 | |
mixed_precision: fp16 | |
gradient_accumulation_steps: 1 | |
# dataset and data loader settings | |
datasets: | |
train: | |
name: LoraDataset | |
concept_list: ortho_datasets/data_configs/elsa.json | |
use_caption: true | |
use_mask: true | |
instance_transform: | |
- { type: HumanResizeCropFinalV3, size: 512, crop_p: 0.5 } | |
- { type: ToTensor } | |
- { type: Normalize, mean: [ 0.5 ], std: [ 0.5 ] } | |
- { type: ShuffleCaption, keep_token_num: 1 } | |
- { type: EnhanceText, enhance_type: human } | |
replace_mapping: | |
<TOK>: <elsa1> <elsa2> | |
batch_size_per_gpu: 2 | |
dataset_enlarge_ratio: 500 | |
val_vis: | |
name: PromptDataset | |
prompts: datasets/validation_prompts/single-concept/characters/test_girl.txt | |
num_samples_per_prompt: 8 | |
latent_size: [ 4,64,64 ] | |
replace_mapping: | |
<TOK>: <elsa1> <elsa2> | |
batch_size_per_gpu: 4 | |
models: | |
pretrained_path: nitrosocke/mo-di-diffusion | |
enable_edlora: true # true means ED-LoRA, false means vanilla LoRA | |
finetune_cfg: | |
text_embedding: | |
enable_tuning: true | |
lr: !!float 1e-3 | |
text_encoder: | |
enable_tuning: true | |
lora_cfg: | |
rank: 5 | |
alpha: 1.0 | |
where: CLIPAttention | |
lr: !!float 1e-5 | |
unet: | |
enable_tuning: true | |
lora_cfg: | |
rank: 5 | |
alpha: 1.0 | |
where: Attention | |
lr: !!float 1e-4 | |
new_concept_token: <elsa1>+<elsa2> | |
initializer_token: <rand-0.013>+man | |
noise_offset: 0.01 | |
attn_reg_weight: 0.01 | |
reg_full_identity: false | |
use_mask_loss: true | |
gradient_checkpoint: false | |
enable_xformers: true | |
# path | |
path: | |
pretrain_network: ~ | |
# training settings | |
train: | |
optim_g: | |
type: AdamW | |
lr: !!float 0.0 # no use since we define different component lr in model | |
weight_decay: 0.01 | |
betas: [ 0.9, 0.999 ] # align with taming | |
# dropkv | |
unet_kv_drop_rate: 0 | |
scheduler: linear | |
emb_norm_threshold: !!float 5.5e-1 | |
# validation settings | |
val: | |
val_during_save: true | |
compose_visualize: true | |
alpha_list: [0, 0.7, 1.0] # 0 means only visualize embedding (without lora weight) | |
sample: | |
num_inference_steps: 50 | |
guidance_scale: 7.5 | |
# logging settings | |
logger: | |
print_freq: 10 | |
save_checkpoint_freq: !!float 10000 | |