NilanE commited on
Commit
f8da35e
·
verified ·
1 Parent(s): 85cf80a

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +98 -0
config.yaml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ logging:
2
+ project: titok_video
3
+ run_name: BaseAll-CB16k-TL128-256x33-init-BS64-clipGrad1.0
4
+
5
+ logging_interval: 50
6
+
7
+ save_path: out_tiny
8
+ save_step_interval: 5000
9
+ keep_prior_checkpoints: -1 # -1 to keep all
10
+
11
+ resume_from_checkpoint:
12
+ init_from_checkpoint: base-interp-256x33-TL128.ckpt
13
+
14
+ model:
15
+ titok:
16
+ temporal_patch_size: 2
17
+ spatial_patch_size: 4
18
+
19
+ fsq_levels: [8, 8, 8, 6, 5] # [7, 5, 5, 5, 5]
20
+ num_latent_tokens: 128
21
+
22
+ encoder_size: base
23
+ decoder_size: base
24
+ exp_residual: False
25
+
26
+ vae:
27
+ type: wfvae # cogvideox, vidtok, wfvae
28
+ path: preprocess_dataset/wf-16
29
+ latent_channels: 16
30
+ temporal_compression: 4
31
+ spatial_compression: 8
32
+
33
+ disc: # experimental
34
+ use_disc: False
35
+
36
+ model_layers: 1
37
+ model_heads: 1
38
+ model_dim: 128
39
+
40
+ temporal_patch_size: 4
41
+ spatial_patch_size: 4
42
+
43
+ disc_start: 45000
44
+ disc_factor: 1.0
45
+ disc_weight: 0.1
46
+ lecam_weight: 0.0 # disabled
47
+
48
+ base_gamma: 1 # higher gamma smooths more earlier in training.
49
+ final_gamma: 0.1
50
+
51
+ dataset:
52
+ train_dataset: "/workspace/out_enc_256_33/**/*.pt"
53
+ eval_dataset: "/workspace/out_enc_256_33_eval/*.pt"
54
+ resolution: 256
55
+ num_frames: 33
56
+ frames_per_second: 8
57
+ workers: 8
58
+
59
+ optimizer:
60
+ titok:
61
+ learning_rate: 1e-4
62
+ beta1: 0.9
63
+ beta2: 0.99
64
+ weight_decay: 1e-4
65
+ warmup_steps: 5000 # 10000
66
+ end_lr: 1e-5
67
+
68
+ disc: # not used
69
+ learning_rate: 1e-4
70
+ beta1: 0.9
71
+ beta2: 0.99
72
+ weight_decay: 1e-4
73
+ warmup_steps: 1000
74
+ end_lr: 1e-5
75
+
76
+ training:
77
+ torch_compile: True
78
+ seed: 42
79
+ max_grad_norm: 1.0 # not needed?
80
+
81
+ batch_size: 64
82
+ # strategy: # ddp
83
+
84
+ enable_tf32: True
85
+ precision: bf16-mixed
86
+ train_devices: 1
87
+ accelerator: 'gpu'
88
+
89
+ max_steps: 500000
90
+ val_step_interval: 2000
91
+
92
+ eval_recon_log_num: 4
93
+ eval_sample_size: 32
94
+ eval_batch_size: 1
95
+ eval_clear_cache: True
96
+ eval_shuffle: True
97
+
98
+ log_codebook: True