File size: 3,064 Bytes
07ae47f aafecee 07ae47f aafecee 07ae47f aafecee 07ae47f aafecee 07ae47f aafecee 07ae47f aafecee 07ae47f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
name: "dyu_fr_transformer-sp"
joeynmt_version: "2.3.0"
model_dir: "saved_model/dyu_fr"
use_cuda: False # False for CPU training
fp16: False
data:
train: "data/dyu_fr"
dev: "data/dyu_fr"
# test: "data/dyu_fr"
dataset_type: "huggingface"
dataset_cfg:
name: "dyu-fr"
sample_dev_subset: 1460
src:
lang: "dyu"
max_length: 100
lowercase: False
normalize: False
level: "bpe"
voc_limit: 4000
voc_min_freq: 1
voc_file: "./models/vocab.txt"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "./models/sp.model"
trg:
lang: "fr"
max_length: 100
lowercase: False
normalize: False
level: "bpe"
voc_limit: 4000
voc_min_freq: 1
voc_file:"./models/vocab.txt"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "./models/sp.model"
special_symbols:
unk_token: "<unk>"
unk_id: 0
pad_token: "<pad>"
pad_id: 1
bos_token: "<s>"
bos_id: 2
eos_token: "</s>"
eos_id: 3
testing:
load_model: "models/best.ckpt"
n_best: 1
beam_size: 10
beam_alpha: 1.2
batch_size: 256
batch_type: "token"
max_output_length: 100
eval_metrics: ["bleu"]
#return_prob: "hyp"
#return_attention: False
sacrebleu_cfg:
tokenize: "13a"
training:
# load_model: "joeynmt-models-v11.0/30600.ckpt"
#reset_best_ckpt: False
#reset_scheduler: False
#reset_optimizer: False
#reset_iter_state: False
random_seed: 42
optimizer: "adamw"
normalization: "tokens"
adam_betas: [0.9, 0.98]
scheduling: "warmupinversesquareroot"
learning_rate_warmup: 8000
learning_rate: 0.0003
learning_rate_min: 0.00000001
weight_decay: 0.0001
label_smoothing: 0.1
loss: "crossentropy"
batch_size: 8192
batch_type: "token"
batch_multiplier: 4
early_stopping_metric: "bleu"
epochs: 1800
updates: 90000
validation_freq: 50
logging_freq: 10
overwrite: True
shuffle: True
print_valid_sents: [0, 1, 2, 3]
keep_best_ckpts: 3
model:
initializer: "xavier_uniform"
bias_initializer: "zeros"
init_gain: 1.0
embed_initializer: "xavier_uniform"
embed_init_gain: 1.0
tied_embeddings: True
tied_softmax: True
encoder:
type: "transformer"
num_layers: 6
num_heads: 4
embeddings:
embedding_dim: 256
scale: True
dropout: 0.1
# typically ff_size = 4 x hidden_size
hidden_size: 256
ff_size: 1024
dropout: 0.2
layer_norm: "pre"
decoder:
type: "transformer"
num_layers: 6
num_heads: 4
embeddings:
embedding_dim: 256
scale: True
dropout: 0.1
# typically ff_size = 4 x hidden_size
hidden_size: 256
ff_size: 1024
dropout: 0.2
layer_norm: "pre"
|