Spaces:
Runtime error
Runtime error
# data settings | |
data: | |
data_dir: [] | |
caption_proportion: | |
prompt: 1 | |
external_caption_suffixes: [] | |
external_clipscore_suffixes: [] | |
clip_thr_temperature: 1.0 | |
clip_thr: 0.0 | |
sort_dataset: false | |
load_text_feat: false | |
load_vae_feat: false | |
transform: default_train | |
type: SanaWebDatasetMS | |
image_size: 512 | |
hq_only: false | |
valid_num: 0 | |
# model settings | |
model: | |
model: SanaMS_600M_P1_D28 | |
image_size: 512 | |
mixed_precision: fp16 # ['fp16', 'fp32', 'bf16'] | |
fp32_attention: true | |
load_from: | |
resume_from: | |
checkpoint: | |
load_ema: false | |
resume_lr_scheduler: true | |
resume_optimizer: true | |
aspect_ratio_type: ASPECT_RATIO_1024 | |
multi_scale: true | |
pe_interpolation: 1.0 | |
micro_condition: false | |
attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear' | |
cross_norm: false | |
autocast_linear_attn: false | |
ffn_type: glumbconv | |
mlp_acts: | |
- silu | |
- silu | |
- | |
mlp_ratio: 2.5 | |
use_pe: false | |
qk_norm: false | |
class_dropout_prob: 0.0 | |
linear_head_dim: 32 | |
# CFG & PAG settings | |
cfg_scale: 4 | |
guidance_type: classifier-free | |
pag_applied_layers: [14] | |
# text encoder settings | |
text_encoder: | |
text_encoder_name: gemma-2-2b-it | |
caption_channels: 2304 | |
y_norm: false | |
y_norm_scale_factor: 1.0 | |
model_max_length: 300 | |
chi_prompt: [] | |
# VAE settings | |
vae: | |
vae_type: dc-ae | |
vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0 | |
scale_factor: 0.41407 | |
vae_latent_dim: 32 | |
vae_downsample_rate: 32 | |
sample_posterior: true | |
# Scheduler settings | |
scheduler: | |
train_sampling_steps: 1000 | |
predict_v: True | |
noise_schedule: linear_flow | |
pred_sigma: false | |
flow_shift: 1.0 | |
weighting_scheme: logit_normal | |
logit_mean: 0.0 | |
logit_std: 1.0 | |
vis_sampler: flow_dpm-solver | |
# training settings | |
train: | |
num_workers: 4 | |
seed: 43 | |
train_batch_size: 32 | |
num_epochs: 100 | |
gradient_accumulation_steps: 1 | |
grad_checkpointing: false | |
gradient_clip: 1.0 | |
gc_step: 1 | |
# optimizer settings | |
optimizer: | |
eps: 1.0e-10 | |
lr: 0.0001 | |
type: AdamW | |
weight_decay: 0.03 | |
lr_schedule: constant | |
lr_schedule_args: | |
num_warmup_steps: 500 | |
auto_lr: | |
rule: sqrt | |
ema_rate: 0.9999 | |
eval_batch_size: 16 | |
use_fsdp: false | |
use_flash_attn: false | |
eval_sampling_steps: 250 | |
lora_rank: 4 | |
log_interval: 50 | |
mask_type: 'null' | |
mask_loss_coef: 0.0 | |
load_mask_index: false | |
snr_loss: false | |
real_prompt_ratio: 1.0 | |
debug_nan: false | |
# checkpoint settings | |
save_image_epochs: 1 | |
save_model_epochs: 1 | |
save_model_steps: 1000000 | |
# visualization settings | |
visualize: false | |
null_embed_root: output/pretrained_models/ | |
valid_prompt_embed_root: output/tmp_embed/ | |
validation_prompts: | |
- dog | |
- portrait photo of a girl, photograph, highly detailed face, depth of field | |
- Self-portrait oil painting, a beautiful cyborg with golden hair, 8k | |
- Astronaut in a jungle, cold color palette, muted colors, detailed, 8k | |
- A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece | |
local_save_vis: false | |
deterministic_validation: true | |
online_metric: false | |
eval_metric_step: 5000 | |
online_metric_dir: metric_helper | |
# work dir settings | |
work_dir: /cache/exps/ | |
skip_step: 0 | |
# LCM settings | |
loss_type: huber | |
huber_c: 0.001 | |
num_ddim_timesteps: 50 | |
w_max: 15.0 | |
w_min: 3.0 | |
ema_decay: 0.95 | |