lpepino commited on
Commit
30e7d18
·
1 Parent(s): 5a9da5a

Upload config.gin with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.gin +56 -0
config.gin ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NUM_ENCODEC_TARGETS=8
2
+ NUM_TOTAL_TARGETS=8
3
+ NUM_TARGET_TOKENS=1024
4
+ MASK_AMOUNT=150
5
+ MASK_GAP_SIZE=15
6
+ MASK_PROP=0.5
7
+ MODEL_DIM=1024
8
+ NUM_ENCODER_LAYERS=20
9
+ NUM_ENCODER_HEADS=12
10
+ NUM_DECODER_LAYERS=2
11
+ NUM_DECODER_HEADS=12
12
+ MASKED_LOSS_WEIGHT=0.9
13
14
+ models.EncodecMAE:
15
+ wav_encoder = @models.encodecmae.encoders.EncodecEncoder
16
+ target_encoder = @models.encodecmae.targets.EncodecQuantizer
17
+ masker = @models.encodecmae.masking.TimeGapMask
18
+ visible_encoder = @encoder/models.transformers.TransformerEncoder
19
+ positional_encoder = @models.transformers.SinusoidalPositionalEmbeddings
20
+ decoder = @decoder/models.transformers.TransformerEncoder
21
+ head = @models.encodecmae.heads.FrameLevelClassificationHead
22
23
+ lr_scheduler=None
24
+ masked_weight=%MASKED_LOSS_WEIGHT
25
+ quantizer_weights=[0.22407463, 0.1759858 , 0.14499009, 0.12150037, 0.10315603, 0.08831368, 0.07608274, 0.06589669]
26
+ torch.optim.AdamW:
27
+ lr=%PRETRAIN_MAX_LR
28
+ betas=(0.9,0.95)
29
+ weight_decay=0.05
30
+ models.encodecmae.targets.EncodecQuantizer:
31
+ n = %NUM_ENCODEC_TARGETS
32
+ models.encodecmae.masking.TimeGapMask:
33
+ mask_amount = %MASK_AMOUNT
34
+ gap_size = %MASK_GAP_SIZE
35
+ mask_prop = %MASK_PROP
36
+ encoder/models.transformers.TransformerEncoder:
37
+ model_dim=%MODEL_DIM
38
+ num_layers=%NUM_ENCODER_LAYERS
39
+ attention_layer=@encoder/models.transformers.MultiHeadAttention
40
+ compile=True
41
+ encoder/models.transformers.MultiHeadAttention:
42
+ model_dim=%MODEL_DIM
43
+ num_heads=%NUM_ENCODER_HEADS
44
+ decoder/models.transformers.TransformerEncoder:
45
+ model_dim=%MODEL_DIM
46
+ num_layers=%NUM_DECODER_LAYERS
47
+ attention_layer=@decoder/models.transformers.MultiHeadAttention
48
+ compile=True
49
+ decoder/models.transformers.MultiHeadAttention:
50
+ model_dim=%MODEL_DIM
51
+ num_heads=%NUM_DECODER_HEADS
52
+ models.transformers.SinusoidalPositionalEmbeddings.embedding_dim = %MODEL_DIM
53
+ models.encodecmae.heads.FrameLevelClassificationHead:
54
+ model_dim=%MODEL_DIM
55
+ num_tokens=%NUM_TARGET_TOKENS
56
+ num_streams=%NUM_TOTAL_TARGETS