|
model_dir: model/pretrain/lab/multilingual/l2r/multi_bpe32k/parallel_mono_contrastive_1/transformer_big_t2t_12e12d |
|
data_1: data/multilingual/bin/merged_deduped_ras |
|
data_mono_1: data/multilingual/bin/mono_only/splitaa |
|
data_mono_2: data/multilingual/bin/mono_only/splitab |
|
data_mono_3: data/multilingual/bin/mono_only/splitac |
|
data_mono_4: data/multilingual/bin/mono_only/splitad |
|
data_mono_5: data/multilingual/bin/mono_only/splitae |
|
data_mono_6: data/multilingual/bin/mono_only/mono_de_fr_en |
|
data_mono_7: data/multilingual/bin/mono_only/mono_nl_pl_pt |
|
source_lang: src |
|
target_lang: trg |
|
task: translation_w_mono |
|
parallel_ratio: 0.2 |
|
mono_ratio: 0.07 |
|
arch: transformer_big_t2t_12e12d |
|
share_all_embeddings: true |
|
encoder_learned_pos: true |
|
decoder_learned_pos: true |
|
max_source_positions: 1024 |
|
max_target_positions: 1024 |
|
dropout: 0.1 |
|
criterion: label_smoothed_cross_entropy_with_contrastive |
|
contrastive_lambda: 1.0 |
|
temperature: 0.1 |
|
lr: 0.0003 |
|
clip_norm: 10.0 |
|
optimizer: adam |
|
adam_eps: 1e-06 |
|
weight_decay: 0.01 |
|
warmup_updates: 10000 |
|
label_smoothing: 0.1 |
|
lr_scheduler: polynomial_decay |
|
min_lr: -1 |
|
max_tokens: 1536 |
|
update_freq: 30 |
|
max_update: 5000000 |
|
no_scale_embedding: true |
|
layernorm_embedding: true |
|
save_interval_updates: 2000 |
|
skip_invalid_size_inputs_valid_test: true |
|
log_interval: 500 |
|
num_workers: 1 |
|
fp16: true |
|
seed: 33122 |
|
|