moiduy04 commited on
Commit
c972b22
·
1 Parent(s): e078940

Upload full config files to support future training.

Browse files
config/config_big.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: 'runs/transformer_big'
2
+
3
+ dataset:
4
+ src_lang: 'lo'
5
+ src_tokenizer: 'BPE'
6
+ src_max_seq_len: 400
7
+ tgt_lang: 'vi'
8
+ tgt_tokenizer: 'WordLevel'
9
+ tgt_max_seq_len: 350
10
+ train_dataset: 'train_clean.dat'
11
+ validate_dataset: 'dev_clean.dat'
12
+ tokenizer_file: "tokenizer_{0}.json"
13
+ bleu_dataset: 'test2023'
14
+
15
+ model: # 42688527 parameters
16
+ d_model: 512
17
+ num_heads: 8
18
+ d_ff: 2048
19
+ dropout_p: 0.3
20
+ num_encoder_layers: 4
21
+ num_decoder_layers: 2
22
+ model_folder: "weights"
23
+ model_basename: "transformer_"
24
+ preload: "big"
25
+ bleu_dataset: 'test2023'
26
+
27
+ train:
28
+ lr: 0.0001 # 1e-4
29
+ batch_size: 16
30
+ num_epochs: 40
31
+ label_smoothing: 0.1
32
+ on_colab: True # are you training on Colab?
33
+ patience: 100 # (steps)
34
+ warm_up_steps: 700
config/config_huge.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: 'runs/transformer_huge'
2
+
3
+ dataset:
4
+ src_lang: 'lo'
5
+ src_tokenizer: 'BPE'
6
+ src_max_seq_len: 400
7
+ tgt_lang: 'vi'
8
+ tgt_tokenizer: 'WordLevel'
9
+ tgt_max_seq_len: 350
10
+ train_dataset: 'train_clean.dat'
11
+ validate_dataset: 'dev_clean.dat'
12
+ tokenizer_file: "tokenizer_{0}.json"
13
+ bleu_dataset: 'test2023'
14
+
15
+ model: # 61604879 parameters
16
+ d_model: 512
17
+ num_heads: 8
18
+ d_ff: 2048
19
+ dropout_p: 0.15
20
+ num_encoder_layers: 8
21
+ num_decoder_layers: 4
22
+ model_folder: "weights"
23
+ model_basename: "transformer_"
24
+ preload: "final"
25
+ bleu_dataset: 'test2023'
26
+
27
+ train:
28
+ lr: 0.001 # 1e-2
29
+ batch_size: 32
30
+ num_epochs: 50
31
+ label_smoothing: 0.1
32
+ on_colab: False # are you training on Colab?
33
+ patience: 1
34
+ warm_up_steps: 200
config/config_small.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: 'runs/transformer_small'
2
+
3
+ dataset:
4
+ src_lang: 'lo'
5
+ src_tokenizer: 'BPE'
6
+ src_max_seq_len: 400
7
+ tgt_lang: 'vi'
8
+ tgt_tokenizer: 'WordLevel'
9
+ tgt_max_seq_len: 350
10
+ train_dataset: 'train_clean.dat'
11
+ validate_dataset: 'dev_clean.dat'
12
+ tokenizer_file: "tokenizer_{0}.json"
13
+ bleu_dataset: 'test2023'
14
+
15
+ model: # 16629775 parameters
16
+ d_model: 256
17
+ num_heads: 8
18
+ d_ff: 1024
19
+ dropout_p: 0.3
20
+ num_encoder_layers: 4
21
+ num_decoder_layers: 2
22
+ model_folder: "weights"
23
+ model_basename: "transformer_"
24
+ preload: "small"
25
+ bleu_dataset: 'test2023'
26
+
27
+ train:
28
+ lr: 0.0001 # 1e-4
29
+ batch_size: 16
30
+ num_epochs: 40
31
+ label_smoothing: 0.1
32
+ on_colab: True # are you training on Colab?
33
+ patience: 100 # (steps)
34
+ warm_up_steps: 700