File size: 3,064 Bytes
07ae47f
 
 
 
aafecee
 
07ae47f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aafecee
07ae47f
 
aafecee
07ae47f
 
 
 
 
 
 
 
aafecee
07ae47f
 
aafecee
07ae47f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aafecee
07ae47f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

name: "dyu_fr_transformer-sp"
joeynmt_version: "2.3.0"
model_dir: "saved_model/dyu_fr"
use_cuda: False # False for CPU training
fp16: False

data:
    train: "data/dyu_fr"
    dev: "data/dyu_fr"
    # test: "data/dyu_fr"
    dataset_type: "huggingface"
    dataset_cfg:
        name: "dyu-fr"
    sample_dev_subset: 1460
    src:
        lang: "dyu"
        max_length: 100
        lowercase: False
        normalize: False
        level: "bpe"
        voc_limit: 4000
        voc_min_freq: 1
        voc_file: "./models/vocab.txt"
        tokenizer_type: "sentencepiece"
        tokenizer_cfg:
            model_file: "./models/sp.model"
    trg:
        lang: "fr"
        max_length: 100
        lowercase: False
        normalize: False
        level: "bpe"
        voc_limit: 4000
        voc_min_freq: 1
        voc_file:"./models/vocab.txt"
        tokenizer_type: "sentencepiece"
        tokenizer_cfg:
            model_file: "./models/sp.model"
    special_symbols:
        unk_token: "<unk>"
        unk_id: 0
        pad_token: "<pad>"
        pad_id: 1
        bos_token: "<s>"
        bos_id: 2
        eos_token: "</s>"
        eos_id: 3


testing:
    load_model: "models/best.ckpt"
    n_best: 1
    beam_size: 10
    beam_alpha: 1.2
    batch_size: 256
    batch_type: "token"
    max_output_length: 100
    eval_metrics: ["bleu"]
    #return_prob: "hyp"
    #return_attention: False
    sacrebleu_cfg:
        tokenize: "13a"

training:
    # load_model: "joeynmt-models-v11.0/30600.ckpt"
    #reset_best_ckpt: False
    #reset_scheduler: False
    #reset_optimizer: False
    #reset_iter_state: False
    random_seed: 42
    optimizer: "adamw"
    normalization: "tokens"
    adam_betas: [0.9, 0.98]
    scheduling: "warmupinversesquareroot"
    learning_rate_warmup: 8000
    learning_rate: 0.0003
    learning_rate_min: 0.00000001
    weight_decay: 0.0001
    label_smoothing: 0.1
    loss: "crossentropy"
    batch_size: 8192
    batch_type: "token"
    batch_multiplier: 4
    early_stopping_metric: "bleu"
    epochs: 1800
    updates: 90000
    validation_freq: 50
    logging_freq: 10
    overwrite: True
    shuffle: True
    print_valid_sents: [0, 1, 2, 3]
    keep_best_ckpts: 3

model:
    initializer: "xavier_uniform"
    bias_initializer: "zeros"
    init_gain: 1.0
    embed_initializer: "xavier_uniform"
    embed_init_gain: 1.0
    tied_embeddings: True
    tied_softmax: True
    encoder:
        type: "transformer"
        num_layers: 6
        num_heads: 4
        embeddings:
            embedding_dim: 256
            scale: True
            dropout: 0.1
        # typically ff_size = 4 x hidden_size
        hidden_size: 256
        ff_size: 1024
        dropout: 0.2
        layer_norm: "pre"
    decoder:
        type: "transformer"
        num_layers: 6
        num_heads: 4
        embeddings:
            embedding_dim: 256
            scale: True
            dropout: 0.1
        # typically ff_size = 4 x hidden_size
        hidden_size: 256
        ff_size: 1024
        dropout: 0.2
        layer_norm: "pre"