name: Lint & Tests on: [push, pull_request] jobs: lint-and-tests: runs-on: ubuntu-latest strategy: matrix: python-version: [3.6] # build only for 3.6 for now steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install --upgrade setuptools pip install -e . pip install -r requirements.opt.txt pip install flake8 if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | flake8 . - name: Unit tests run: | python -m unittest discover - name: Test vocabulary build run: | python onmt/bin/build_vocab.py \ -config data/data.yaml \ -save_data /tmp/onmt \ -n_sample 5000 \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ && rm -rf /tmp/sample - name: Test vocabulary build with features run: | python onmt/bin/build_vocab.py \ -config data/features_data.yaml \ -save_data /tmp/onmt_feat \ -src_vocab /tmp/onmt_feat.vocab.src \ -tgt_vocab /tmp/onmt_feat.vocab.tgt \ -src_feats_vocab '{"feat0": "/tmp/onmt_feat.vocab.feat0"}' \ -n_sample -1 \ && rm -rf /tmp/sample - name: Test field/transform dump run: | # The dumped fields are used later when testing tools python train.py \ -config data/data.yaml \ -save_data /tmp/onmt.train.check \ -dump_fields \ -dump_transforms \ -n_sample 30 \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 - name: Test RNN training run: | python train.py \ -config data/data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -rnn_size 2 \ -batch_size 10 \ -word_vec_size 5 \ -report_every 5\ -rnn_size 10 \ -train_steps 10 - name: Test RNN training with copy run: | python train.py \ -config data/data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -rnn_size 2 \ -batch_size 10 \ -word_vec_size 5 \ -report_every 5 \ -rnn_size 10 \ -train_steps 10 \ -copy_attn - name: Test RNN training with coverage run: | python train.py \ -config data/data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -rnn_size 2 -batch_size 10 \ -word_vec_size 5 -report_every 5 \ -coverage_attn true -lambda_coverage 0.1 \ -rnn_size 10 -train_steps 10 - name: Test Transformer training with align run: | python train.py \ -config data/align_data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -max_generator_batches 0 \ -encoder_type transformer \ -decoder_type transformer \ -layers 4 \ -word_vec_size 16 \ -rnn_size 16 \ -heads 2 \ -transformer_ff 64 \ -lambda_align 0.05 \ -alignment_layer 2 \ -alignment_heads 0 \ -report_every 5 \ -train_steps 10 - name: Test LM training run: | python train.py \ -config data/lm_data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.src \ -model_task lm \ -encoder_type transformer_lm \ -decoder_type transformer_lm \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -dec_layers 2 -batch_size 10 \ -heads 4 -transformer_ff 64 \ -word_vec_size 16 -report_every 5 \ -rnn_size 16 -train_steps 10 - name: Test LM training with copy run: | python train.py \ -config data/lm_data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.src \ -model_task lm \ -encoder_type transformer_lm \ -decoder_type transformer_lm \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -dec_layers 2 -batch_size 10 \ -heads 4 -transformer_ff 64 \ -word_vec_size 16 -report_every 5 \ -rnn_size 16 -train_steps 10 \ -copy_attn - name: Test Graph neural network training run: | python train.py \ -config data/ggnn_data.yaml \ -src_seq_length 1000 \ -tgt_seq_length 30 \ -encoder_type ggnn \ -layers 2 \ -decoder_type rnn \ -rnn_size 256 \ -learning_rate 0.1 \ -learning_rate_decay 0.8 \ -global_attention general \ -batch_size 32 \ -word_vec_size 256 \ -bridge \ -train_steps 10 \ -n_edge_types 9 \ -state_dim 256 \ -n_steps 10 \ -n_node 64 - name: Testing training with features run: | python onmt/bin/train.py \ -config data/features_data.yaml \ -src_vocab /tmp/onmt_feat.vocab.src \ -tgt_vocab /tmp/onmt_feat.vocab.tgt \ -src_feats_vocab '{"feat0": "/tmp/onmt_feat.vocab.feat0"}' \ -src_vocab_size 1000 -tgt_vocab_size 1000 \ -rnn_size 2 -batch_size 10 \ -word_vec_size 5 -rnn_size 10 \ -report_every 5 -train_steps 10 \ -save_model /tmp/onmt.model \ -save_checkpoint_steps 10 - name: Testing translation with features run: | python translate.py \ -model /tmp/onmt.model_step_10.pt \ -src data/data_features/src-test.txt \ -src_feats "{'feat0': 'data/data_features/src-test.feat0'}" \ -verbose - name: Test RNN translation run: | head data/src-test.txt > /tmp/src-test.txt python translate.py \ -model onmt/tests/test_model.pt \ -src /tmp/src-test.txt \ -verbose - name: Test RNN ensemble translation run: | head data/src-test.txt > /tmp/src-test.txt python translate.py \ -model onmt/tests/test_model.pt \ onmt/tests/test_model.pt \ -src /tmp/src-test.txt \ -verbose - name: Test RNN translation with beam search run: | python translate.py \ -model onmt/tests/test_model2.pt \ -src data/morph/src.valid \ -verbose \ -batch_size 10 \ -beam_size 10 \ -tgt data/morph/tgt.valid \ -out /tmp/trans diff data/morph/tgt.valid /tmp/trans && rm /tmp/trans - name: Test RNN translation with random sampling run: | python translate.py \ -model onmt/tests/test_model2.pt \ -src data/morph/src.valid \ -verbose \ -batch_size 10 \ -beam_size 1 \ -seed 1 \ -random_sampling_topk "-1" \ -random_sampling_temp 0.0001 \ -tgt data/morph/tgt.valid \ -out /tmp/trans diff data/morph/tgt.valid /tmp/trans && rm /tmp/trans - name: Test LM generation run: | head data/src-test.txt > /tmp/src-test.txt python translate.py \ -model onmt/tests/test_model_lm.pt \ -src data/src-test.txt \ -verbose - name: Test LM generation with beam search run: | python translate.py \ -model onmt/tests/test_model_lm.pt \ -src data/data_lm/src-gen.txt \ -verbose -batch_size 10 \ -beam_size 10 \ -ban_unk_token \ -out /tmp/gen diff data/data_lm/gen-beam-sol.txt /tmp/gen && rm /tmp/gen - name: Test LM generation with random sampling run: | python translate.py -model onmt/tests/test_model_lm.pt \ -src data/data_lm/src-gen.txt \ -verbose -batch_size 10 \ -beam_size 1 \ -seed 1 \ -random_sampling_topk -1 \ -random_sampling_temp 0.0001 \ -ban_unk_token \ -out /tmp/gen diff data/data_lm/gen-sampling-sol.txt /tmp/gen && rm /tmp/gen - name: Test LM generation with random top-k/nucleus sampling run: | python translate.py -model onmt/tests/test_model_lm.pt \ -src data/data_lm/src-gen.txt \ -verbose -batch_size 10 \ -beam_size 1 \ -seed 3 \ -random_sampling_topk -1 \ -random_sampling_topp 0.95 \ -random_sampling_temp 1 \ -ban_unk_token \ -out /tmp/gen diff data/data_lm/gen-nucleus-sampling-sol.txt /tmp/gen && rm /tmp/gen - name: Test LM generation with random sampling multi-beams run: | python translate.py -model onmt/tests/test_model_lm.pt \ -src data/data_lm/src-gen.txt \ -verbose -batch_size 10 \ -beam_size 10 \ -seed 2 \ -random_sampling_topk 50 \ -random_sampling_topp 0.95 \ -random_sampling_temp 1 \ -length_penalty avg \ -ban_unk_token \ -min_length 5 \ -out /tmp/gen diff data/data_lm/gen-sampling-beams-sol.txt /tmp/gen && rm /tmp/gen - name: Test extract_vocabulary tool run: | python tools/extract_vocabulary.py \ -file /tmp/onmt.train.check.vocab.pt \ -file_type field \ -side src \ -out_file /tmp/onmt.vocab.txt if ! wc -l /tmp/onmt.vocab.txt | grep -qF "1002" then echo "wrong word count" && exit 1 else echo "create vocabulary pass" fi - name: Test embeddings_to_torch tool run: | python tools/embeddings_to_torch.py \ -emb_file_enc onmt/tests/sample_glove.txt \ -emb_file_dec onmt/tests/sample_glove.txt \ -dict_file /tmp/onmt.train.check.vocab.pt \ -output_file /tmp/q_gloveembeddings \ && rm /tmp/q_gloveembeddings* rm /tmp/onmt.train.check.*.pt - name: Test extract_embeddings tool run: | python tools/extract_embeddings.py \ -model onmt/tests/test_model.pt - name: Test checkpoint vocabulary update run: | python train.py \ -config data/data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -rnn_size 2 \ -batch_size 10 \ -word_vec_size 5 \ -report_every 5\ -rnn_size 10 \ -train_steps 10 \ -save_model /tmp/onmt.model \ -save_checkpoint_steps 10 sed -i '1s/^/new_tok\t100000000\n/' /tmp/onmt.vocab.src python train.py \ -config data/data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -rnn_size 2 \ -batch_size 10 \ -word_vec_size 5 \ -report_every 5\ -rnn_size 10 \ -train_steps 20 \ -update_vocab \ -reset_optim "states" \ -train_from /tmp/onmt.model_step_10.pt - name: Test checkpoint vocabulary update with LM run: | python train.py \ -config data/lm_data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -model_task lm \ -encoder_type transformer_lm \ -decoder_type transformer_lm \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -dec_layers 2 -batch_size 10 \ -heads 4 -transformer_ff 64 \ -word_vec_size 16 -report_every 5 \ -save_model /tmp/lm.onmt.model \ -save_checkpoint_steps 10 \ -rnn_size 16 -train_steps 10 sed -i '1s/^/new_tok\t100000000\n/' /tmp/onmt.vocab.src python train.py \ -config data/lm_data.yaml \ -src_vocab /tmp/onmt.vocab.src \ -tgt_vocab /tmp/onmt.vocab.tgt \ -model_task lm \ -encoder_type transformer_lm \ -decoder_type transformer_lm \ -src_vocab_size 1000 \ -tgt_vocab_size 1000 \ -dec_layers 2 -batch_size 10 \ -heads 4 -transformer_ff 64 \ -word_vec_size 16 -report_every 5 \ -rnn_size 16 -train_steps 20 \ -update_vocab -reset_optim "states" \ -train_from /tmp/lm.onmt.model_step_10.pt build-docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python 3.6 uses: actions/setup-python@v2 with: python-version: 3.6 - name: Install dependencies run: | python -m pip install --upgrade pip pip install --upgrade setuptools pip install -e . pip install -r docs/requirements.txt - name: Build docs run: | set -e # Check that docs are built without errors cd docs/ && make html && cd ..