|
|
|
seed: 42 |
|
share_vocab: true |
|
save_data: data/wikitext-103-raw/run/example |
|
|
|
src_vocab: data/wikitext-103-raw/run/example.vocab.src |
|
src_vocab_size: 60000 |
|
tgt_vocab_size: 60000 |
|
src_subword_type: bpe |
|
src_subword_model: data/wikitext-103-raw/subwords.bpe |
|
src_onmttok_kwargs: '{"mode": "aggressive", "joiner_annotate": True, "preserve_placeholders": |
|
True, "case_markup": True, "soft_case_regions": True, "preserve_segmented_tokens": |
|
True}' |
|
transforms: [onmt_tokenize, filtertoolong] |
|
src_seq_length: 512 |
|
tgt_seq_length: 512 |
|
|
|
|
|
overwrite: True |
|
|
|
|
|
data: |
|
corpus_1: |
|
path_src: data/wikitext-103-raw/wiki.train.raw |
|
valid: |
|
path_src: data/wikitext-103-raw/wiki.valid.raw |
|
|
|
|
|
|
|
src_vocab: data/wikitext-103-raw/run/example.vocab.src |
|
|
|
|
|
world_size: 1 |
|
gpu_ranks: [0] |
|
|
|
|
|
save_model: data/wikitext-103-raw/run/model-lm |
|
save_checkpoint_steps: 50000 |
|
train_steps: 1000000 |
|
valid_steps: 500 |
|
report_every: 100 |
|
tensorboard: true |
|
tensorboard_log_dir: data/wikitext-103-raw/run/tensorboard |
|
|
|
|
|
model_task: lm |
|
encoder_type: transformer_lm |
|
decoder_type: transformer_lm |
|
position_encoding: true |
|
dec_layers: 6 |
|
heads: 8 |
|
rnn_size: 512 |
|
word_vec_size: 512 |
|
transformer_ff: 2048 |
|
dropout_steps: [0] |
|
dropout: [0.1] |
|
attention_dropout: [0.1] |
|
batch_size: 2048 |
|
batch_type: tokens |
|
|
|
model_dtype: "fp32" |
|
optim: "adam" |
|
learning_rate: 2 |
|
warmup_steps: 8000 |
|
decay_method: "noam" |
|
adam_beta2: 0.998 |
|
max_grad_norm: 0 |
|
label_smoothing: 0.1 |
|
param_init: 0 |
|
param_init_glorot: true |
|
normalization: "tokens" |
|
|