name: "kin_en_transformer" data: src: lang: "kin" level: "bpe" lowercase: False tokenizer_type: "subword-nmt" num_merges: 4000 tokenizer_cfg: num_merges: 4000 codes: "models/kin_en_transformer/bpe.codes.4000" pretokenizer: "none" trg: lang: "en" level: "bpe" lowercase: False tokenizer_type: "subword-nmt" num_merges: 4000 tokenizer_cfg: num_merges: 4000 codes: "models/kin_en_transformer/bpe.codes.4000" pretokenizer: "none" train: "data/train/kin_en_train" dev: "data/val/kin_en_val" test: "data/test/kin_en_test" level: "bpe" # lowercase: False max_sent_length: 100 # src_vocab: "models/kin_en_tranformer/src_vocab" # trg_vocab: "models/kin_en_tranformer/src_vocab" dataset_type: "tsv" testing: beam_size: 15 beam_alpha: 1.0 eval_metrics: ["bleu"] batch_type: sentence sacrebleu_cfg: # sacrebleu options remove_whitespace: True # `remove_whitespace` option in sacrebleu.corpus_chrf() function (defalut: True) tokenize: "none" # `tokenize` option in sacrebleu.corpus_bleu() function (options include: "none" (use for already tokenized test data), "13a" (default minimal tokenizer), "intl" which mostly does punctuation and unicode, etc) training: #load_model: "{ models/{name}_transformer/1.ckpt" # if uncommented, load a pre-trained model from this checkpoint random_seed: 42 optimizer: "adam" normalization: "tokens" adam_betas: [0.9, 0.999] scheduling: "plateau" # TODO: try switching from plateau to Noam scheduling patience: 5 # For plateau: decrease learning rate by decrease_factor if validation score has not improved for this many validation rounds. learning_rate_factor: 0.5 # factor for Noam scheduler (used with Transformer) learning_rate_warmup: 1000 # warmup steps for Noam scheduler (used with Transformer) decrease_factor: 0.7 loss: "crossentropy" learning_rate: 0.0003 learning_rate_min: 0.00000001 weight_decay: 0.0 label_smoothing: 0.1 batch_size: 256 batch_type: "token" eval_batch_size: 3600 eval_batch_type: "token" batch_multiplier: 1 early_stopping_metric: "ppl" epochs: 30 # TODO: Decrease for when playing around and checking of working. Around 30 is sufficient to check if its working at all validation_freq: 1000 # TODO: Set to at least once per epoch. logging_freq: 100 eval_metric: "bleu" model_dir: "models/kin_en_transformer" overwrite: False # TODO: Set to True if you want to overwrite possibly existing models. shuffle: True use_cuda: True max_output_length: 100 print_valid_sents: [0, 1, 2, 3] keep_last_ckpts: 3 model: initializer: "xavier_normal" bias_initializer: "zeros" init_gain: 1.0 embed_initializer: "xavier_normal" embed_init_gain: 1.0 tied_embeddings: False tied_softmax: True encoder: type: "transformer" num_layers: 6 num_heads: 8 embeddings: embedding_dim: 256 scale: True dropout: 0. # typically ff_size = 4 x hidden_size hidden_size: 256 ff_size: 1024 dropout: 0.1 layer_norm: "post" decoder: type: "transformer" num_layers: 6 num_heads: 8 embeddings: embedding_dim: 256 scale: True dropout: 0. # typically ff_size = 4 x hidden_size hidden_size: 256 ff_size: 1024 dropout: 0.1 layer_norm: "post"