File size: 3,239 Bytes
bae832d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
accum_count: 100
accum_steps: 0
adam_beta2: 0.998
attention_dropout: 0.1
batch_size: 1500
batch_type: tokens
bucket_size: 49304
data:
27e6308e0faf12cd1b4bed73708fad6f:
path_src: dataset/vep-ru/et-synt/source.txt
path_tgt: dataset/vep-ru/et-synt/target.txt
src_prefix: et_s_Latn
tgt_prefix: ''
transforms: &id001
- sentencepiece
- filtertoolong
- prefix
weight: 37
2caaf1b1e5e15d4a06b5b77c438ba0bc:
path_src: dataset/vep-ru/fin-original/source.txt
path_tgt: dataset/vep-ru/fin-original/target.txt
src_prefix: fi_Latn
tgt_prefix: ''
transforms: *id001
weight: 37
3def7622ce4622202126c8828c6fae3f:
path_src: dataset/vep-ru/fin-synt/source.txt
path_tgt: dataset/vep-ru/fin-synt/target.txt
src_prefix: fi_s_Latn
tgt_prefix: ''
transforms: *id001
weight: 37
517216e92cd45eb876c79dc3bafaad18:
path_src: dataset/vep-ru/var/source.txt
path_tgt: dataset/vep-ru/var/target.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 167
815e8e066d03025e82529ef5c5ad232c:
path_src: dataset/vep-ru/vep/source.txt
path_tgt: dataset/vep-ru/vep/target.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 6
8e6d221db9fc70bb3fd1104c24c8f25c:
path_src: dataset/vep-ru/vep-dic/source.txt
path_tgt: dataset/vep-ru/vep-dic/target.txt
src_prefix: ''
tgt_prefix: ''
transforms: *id001
weight: 1
f080464d79eb04a44e9947db1b54c17b:
path_src: dataset/vep-ru/et-original/source.txt
path_tgt: dataset/vep-ru/et-original/target.txt
src_prefix: et_Latn
tgt_prefix: ''
transforms: *id001
weight: 37
valid:
path_src: run/vep_ru-1.0/src-val.txt
path_tgt: run/vep_ru-1.0/tgt-val.txt
transforms: *id001
dec_layers: 20
decay_method: rsqrt
decoder_type: transformer
dropout: 0.1
dropout_steps: 0
early_stopping: 0
enc_layers: 20
encoder_type: transformer
gpu_ranks:
- 0
- 1
heads: 8
hidden_size: 512
keep_checkpoint: 15
label_smoothing: 0.1
learning_rate: 0.005
max_generator_batches: 2
max_grad_norm: 0
max_relative_positions: 20
model_dtype: fp16
normalization: tokens
num_workers: 6
optim: pagedadamw8bit
param_init: 0
param_init_glorot: true
pos_ffn_activation_fn: gated-gelu
position_encoding: false
queue_size: 10000
reset_optim: none
rnn_size: 512
save_checkpoint_steps: 500
save_data: run/vep_ru-1.0/opennmt
save_model: run/vep_ru-1.0/opennmt/openmt.model
share_decoder_embeddings: true
share_embeddings: true
share_vocab: true
skip_empty_level: silent
src_onmttok_kwargs:
lang: vep
mode: none
src_seq_length: 185
src_subword_alpha: 0.0
src_subword_model: run/vep_ru-1.0/sentencepiece.model
src_subword_nbest: 1
src_subword_type: sentencepiece
src_vocab: run/vep_ru-1.0/opennmt/openmt.vocab
src_vocab_size: 32000
tgt_onmttok_kwargs:
lang: ru
mode: none
tgt_seq_length: 185
tgt_subword_alpha: 0.0
tgt_subword_model: run/vep_ru-1.0/sentencepiece.model
tgt_subword_nbest: 1
tgt_subword_type: sentencepiece
tgt_vocab: run/vep_ru-1.0/opennmt/openmt.vocab
tgt_vocab_size: 32000
train_steps: 2000
transformer_ff: 6144
update_vocab: 'False'
valid_batch_size: 64
valid_metrics:
- BLEU
valid_steps: 500
warmup_steps: 500
word_vec_size: 512
world_size: 2
|