|
wandb: |
|
entity: r-ap |
|
run_id: xvom3pxa |
|
experiment: |
|
name: muse-multi |
|
project: muse-prod |
|
output_dir: output/ |
|
max_train_examples: 28500 |
|
max_eval_examples: 1000 |
|
save_every: 1000 |
|
eval_every: 700 |
|
generate_every: 200 |
|
log_every: 50 |
|
log_grad_norm_every: 100000000 |
|
resume_from_checkpoint: latest |
|
resume_lr_scheduler: true |
|
checkpoints_total_limit: 4 |
|
logging_dir: output/logs |
|
model: |
|
vq_model: |
|
type: vqgan |
|
text_encoder: |
|
type: clip |
|
pretrained: openMUSE/clip-vit-large-patch14-text-enc |
|
transformer: |
|
vocab_size: 8256 |
|
hidden_size: 1024 |
|
intermediate_size: 2816 |
|
num_hidden_layers: 22 |
|
num_attention_heads: 16 |
|
in_channels: 768 |
|
block_out_channels: |
|
- 768 |
|
block_has_attention: |
|
- true |
|
block_num_heads: 12 |
|
num_res_blocks: 3 |
|
res_ffn_factor: 4 |
|
patch_size: 1 |
|
encoder_hidden_size: 768 |
|
add_cross_attention: true |
|
project_encoder_hidden_states: true |
|
codebook_size: 8192 |
|
num_vq_tokens: 256 |
|
initializer_range: 0.02 |
|
norm_type: rmsnorm |
|
layer_norm_eps: 1.0e-06 |
|
ln_elementwise_affine: true |
|
use_encoder_layernorm: false |
|
use_bias: false |
|
hidden_dropout: 0.0 |
|
attention_dropout: 0.0 |
|
use_codebook_size_for_output: true |
|
use_empty_embeds_for_uncond: true |
|
add_cond_embeds: true |
|
cond_embed_dim: 768 |
|
add_micro_cond_embeds: true |
|
micro_cond_encode_dim: 256 |
|
micro_cond_embed_dim: 1280 |
|
force_down_up_sample: true |
|
architecture: uvit |
|
enable_xformers_memory_efficient_attention: true |
|
dataset: |
|
preprocessing: |
|
max_seq_length: 77 |
|
resolution: 256 |
|
optimizer: |
|
name: adamw |
|
params: |
|
learning_rate: 0.0001 |
|
scale_lr: false |
|
beta1: 0.9 |
|
beta2: 0.999 |
|
weight_decay: 0.01 |
|
epsilon: 1.0e-08 |
|
lr_scheduler: |
|
scheduler: constant_with_warmup |
|
params: |
|
learning_rate: ${optimizer.params.learning_rate} |
|
warmup_steps: 100 |
|
training: |
|
gradient_accumulation_steps: 1 |
|
batch_size: 20 |
|
mixed_precision: 'no' |
|
enable_tf32: true |
|
use_ema: true |
|
ema_decay: 0.9999 |
|
ema_update_after_step: 0 |
|
ema_update_every: 1 |
|
seed: 13399 |
|
max_train_steps: 20000 |
|
overfit_one_batch: false |
|
cond_dropout_prob: 0.1 |
|
min_masking_rate: 0.0 |
|
label_smoothing: 0.1 |
|
max_grad_norm: null |
|
guidance_scale: 8 |
|
generation_timesteps: 16 |
|
use_soft_code_target: false |
|
use_stochastic_code: false |
|
soft_code_temp: 1.0 |
|
mask_schedule: cosine |
|
mask_contiguous_region_prob: 0.15 |
|
config: configs/segmentation.yaml |
|
|