# experiment.meta: now with comments. [CORPUS] multiple get-corpus in: get-corpus-script out: raw-stem default-name: corpus/txt rerun-on-change: input-extension output-extension template: IN OUT $input-extension $output-extension pre-tok-clean in: raw-stem out: pre-tok-cleaned default-name: corpus/pre-tok-cleaned pass-unless: pre-tok-clean template: $pre-tok-clean IN $input-extension $output-extension OUT OUT.lines-retained parallelizable: yes tokenize in: pre-tok-cleaned out: tokenized-stem default-name: corpus/tok pass-unless: input-tokenizer output-tokenizer template-if: input-tokenizer IN.$input-extension OUT.$input-extension template-if: output-tokenizer IN.$output-extension OUT.$output-extension parallelizable: yes clean in: tokenized-stem out: clean-stem default-name: corpus/clean ignore-if: cleaner rerun-on-change: max-sentence-length $moses-script-dir/training/clean-corpus-n.perl template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained error: there is a blank factor error: is too long! at custom-clean in: tokenized-stem out: clean-stem default-name: corpus/clean ignore-unless: cleaner rerun-on-change: max-sentence-length cleaner template: $cleaner IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained error: there is a blank factor error: is too long! at parse in: clean-stem out: parsed-stem default-name: corpus/parsed pass-unless: input-parser output-parser template-if: input-parser IN.$input-extension OUT.$input-extension template-if: output-parser IN.$output-extension OUT.$output-extension parallelizable: yes post-parse-clean in: parsed-stem out: clean-parsed-stem default-name: corpus/parsed-clean pass-unless: input-parser output-parser template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 10000 OUT.lines-retained --ignore-xml error: there is a blank factor factorize in: clean-parsed-stem out: factorized-stem rerun-on-change: TRAINING:input-factors TRAINING:output-factors default-name: corpus/factored pass-unless: TRAINING:input-factors pass-if: factorize-after-split parallelizable: yes error: can't open error: incompatible number of words in factor truecase in: factorized-stem TRUECASER:truecase-model out: truecased-stem rerun-on-change: input-truecaser output-truecaser default-name: corpus/truecased pass-unless: input-truecaser output-truecaser template-if: input-truecaser IN.$input-extension OUT.$input-extension -model IN1.$input-extension template-if: output-truecaser IN.$output-extension OUT.$output-extension -model IN1.$output-extension parallelizable: yes source-label in: truecased-stem out: source-labelled default-name: corpus/labelled pass-unless: source-labeller template-if: source-labeller IN.$input-extension OUT.$input-extension template-if: cat IN.$output-extension OUT.$output-extension parallelizable: yes lowercase in: source-labelled out: lowercased-stem default-name: corpus/lowercased pass-unless: input-lowercaser output-lowercaser template-if: input-lowercaser IN.$input-extension OUT.$input-extension template-if: output-lowercaser IN.$output-extension OUT.$output-extension parallelizable: yes split in: lowercased-stem SPLITTER:splitter-model out: split-stem default-name: corpus/split pass-unless: input-splitter output-splitter template-if: input-splitter IN.$input-extension OUT.$input-extension -model IN1.$input-extension template-if: output-splitter IN.$output-extension OUT.$output-extension -model IN1.$output-extension post-split-clean in: split-stem out: clean-split-stem default-name: corpus/split-clean ignore-if: input-parser output-parser pass-unless: input-splitter output-splitter template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained error: there is a blank factor post-split-clean-syntax in: split-stem out: clean-split-stem default-name: corpus/split-clean ignore-unless: input-parser output-parser pass-unless: input-splitter output-splitter template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 10000 OUT.lines-retained --ignore-xml error: there is a blank factor post-split-factorize in: clean-split-stem out: post-split-factorized-stem rerun-on-change: TRAINING:input-factors TRAINING:output-factors default-name: corpus/split-factored pass-unless: factorize-after-split parallelizable: yes error: can't open error: incompatible number of words in factor [RECASING] single tokenize in: raw out: tokenized default-name: recasing/cased pass-unless: output-tokenizer template: $output-tokenizer < IN > OUT train in: tokenized out: recase-config template: $moses-script-dir/recaser/train-recaser.perl -train-script $TRAINING:script -dir OUT.model -corpus IN -scripts-root-dir $moses-script-dir -config OUT $recasing-settings default-name: recasing/moses.ini tmp-name: recasing/model ignore-unless: EVALUATION:recaser error: cannot execute binary file [TRUECASER] single consolidate in: CORPUS:clean-parsed-stem out: tokenized-stem default-name: truecaser/corpus pass-unless: trainer template: $moses-script-dir/ems/support/consolidate-training-data.perl $input-extension $output-extension OUT IN error: number of lines don't match train-input in: tokenized-stem out: truecase-model rerun-on-change: trainer pass-unless: trainer ignore-if: output-truecaser ignore-unless: input-truecaser default-name: truecaser/truecase-model template: $trainer -model OUT.$input-extension -corpus IN.$input-extension train-output in: tokenized-stem out: truecase-model rerun-on-change: trainer pass-unless: trainer ignore-if: input-truecaser ignore-unless: output-truecaser default-name: truecaser/truecase-model template: $trainer -model OUT.$output-extension -corpus IN.$output-extension train in: tokenized-stem out: truecase-model rerun-on-change: trainer pass-unless: trainer ignore-unless: AND input-truecaser output-truecaser default-name: truecaser/truecase-model template: $trainer -model OUT.$input-extension -corpus IN.$input-extension ; $trainer -model OUT.$output-extension -corpus IN.$output-extension [SPLITTER] single consolidate in: CORPUS:lowercased-stem out: truecased-stem default-name: splitter/corpus ignore-unless: input-splitter output-splitter template: $moses-script-dir/ems/support/consolidate-training-data.perl $input-extension $output-extension OUT IN train in: truecased-stem out: splitter-model default-name: splitter/split-model ignore-unless: input-splitter output-splitter ignore-if: no-splitter-training [LM] multiple prepare-bilingual-lm in: TRAINING:corpus TRAINING:word-alignment out: numberized_ngrams ignore-unless: bilingual-lm rerun-on-change: TRAINING:corpus TRAINING:word-alignment source-window order epochs default-name: lm/blm train-bilingual-lm in: numberized_ngrams TRAINING:corpus out: binlm ignore-unless: bilingual-lm rerun-on-change: numberized_ngrams default-name: lm/blm train-nplm in: stripped-corpus out: binlm ignore-unless: nplm rerun-on-change: stripped-corpus nplm-settings default-name: lm/nplm get-corpus in: get-corpus-script out: raw-corpus pass-unless: get-corpus-script default-name: lm/txt template: $get-corpus-script > OUT use-parallel-corpus in: parallel-corpus-stem out: tokenized-corpus default-name: lm/tok ignore-unless: parallel-corpus-stem template: ln -s IN.$output-extension OUT error: failed to create symbolic link tokenize in: raw-corpus out: tokenized-corpus default-name: lm/tok pass-unless: output-tokenizer ignore-if: parallel-corpus-stem concatenate-files concatenate-files-split template: $output-tokenizer < IN > OUT parallelizable: yes mock-parse in: tokenized-corpus out: mock-parsed-corpus default-name: lm/mock-parsed pass-unless: mock-output-parser-lm ignore-if: concatenate-files concatenate-files-split template: $mock-output-parser-lm < IN > OUT factorize in: mock-parsed-corpus out: factorized-corpus default-name: lm/factored pass-unless: factors pass-if: factorize-after-split ignore-if: concatenate-files concatenate-files-split parallelizable: yes error: can't open error: incompatible number of words in factor lowercase in: factorized-corpus out: lowercased-corpus default-name: lm/lowercased pass-unless: output-lowercaser ignore-if: output-truecaser concatenate-files concatenate-files-split #only-factor-0: yes template: $output-lowercaser < IN > OUT parallelizable: yes truecase in: factorized-corpus TRUECASER:truecase-model out: lowercased-corpus rerun-on-change: output-truecaser default-name: lm/truecased ignore-unless: output-truecaser ignore-if: concatenate-files concatenate-files-split only-factor-0: yes template: $output-truecaser -model IN1.$output-extension < IN > OUT parallelizable: yes split in: lowercased-corpus SPLITTER:splitter-model out: split-corpus rerun-on-change: output-splitter default-name: lm/split pass-unless: output-splitter ignore-if: concatenate-files concatenate-files-split template: $output-splitter -model IN1.$output-extension < IN > OUT post-split-factorize in: split-corpus out: split-factorized-corpus default-name: lm/split-factored rerun-on-change: TRAINING:input-factors TRAINING:output-factors pass-unless: factorize-after-split ignore-if: concatenate-files parallelizable: yes error: can't open error: incompatible number of words in factor strip in: split-factorized-corpus out: stripped-corpus default-name: lm/stripped pass-unless: mock-output-parser-lm ignore-if: concatenate-files template: $moses-script-dir/training/strip-xml.perl < IN > OUT concatenate-split in: concatenate-files-split out: split-corpus ignore-unless: concatenate-files-split default-name: lm/split template: cat IN > OUT concatenate in: concatenate-files out: stripped-corpus ignore-unless: concatenate-files default-name: lm/stripped template: cat IN > OUT train in: stripped-corpus out: lm default-name: lm/lm ignore-if: rlm-training custom-training bilingual-lm nplm rerun-on-change: lm-training order settings template: $lm-training -order $order $settings -text IN -lm OUT error: cannot execute binary file error: unrecognised option not-error: BadDiscountException not-error: To override this error train-custom in: stripped-corpus out: binlm default-name: lm/custom-lm rerun-on-change: custom-training ignore-unless: AND custom-training config-feature-line config-weight-line ignore-if: syntactic template: $custom-training -text IN -lm OUT final-model: yes train-custom-syntax in: split-factorized-corpus out: binlm default-name: lm/custom-lm rerun-on-change: custom-training ignore-unless: AND custom-training config-feature-line config-weight-line syntactic mock-output-parser-lm template: $custom-training -text IN -lm OUT final-model: yes randomize in: lm out: rlm default-name: lm/rlm pass-unless: lm-randomizer ignore-if: rlm-training train-randomized in: stripped-corpus out: rlm default-name: lm/rlm ignore-unless: rlm-training rerun-on-change: rlm-training order quantize in: rlm out: qlm pass-unless: lm-quantizer default-name: lm/qlm template: $lm-quantizer IN OUT binarize in: qlm out: binlm pass-unless: lm-binarizer ignore-if: bilingual-lm nplm rerun-on-change: lm default-name: lm/binlm template: $lm-binarizer IN OUT error: set KENLM_MAX_ORDER to at least this value final-model: yes [INTERPOLATED-LM] single tuning-from-sgm in: tuning-sgm out: raw-tuning default-name: lm/interpolate-tuning.txt template: $moses-script-dir/ems/support/input-from-sgm.perl < IN > OUT tokenize-tuning in: raw-tuning out: tokenized-tuning default-name: lm/interpolate-tuning.tok pass-unless: output-tokenizer template: $output-tokenizer < IN > OUT parallelizable: yes mock-parse-tuning in: tokenized-tuning out: mock-parsed-tuning default-name: lm/interpolate-tuning.mock-parsed pass-unless: mock-output-parser-lm template: $mock-output-parser-lm < IN > OUT factorize-tuning in: mock-parsed-tuning out: factorized-tuning default-name: lm/interpolate-tuning.factored pass-unless: TRAINING:output-factors pass-if: factorize-after-split parallelizable: yes error: can't open error: incompatible number of words in factor lowercase-tuning in: factorized-tuning out: lowercased-tuning default-name: lm/interpolate-tuning.lowercased pass-unless: output-lowercaser ignore-if: output-truecaser template: $output-lowercaser < IN > OUT truecase-tuning in: factorized-tuning TRUECASER:truecase-model out: lowercased-tuning rerun-on-change: output-truecaser default-name: lm/interpolate-tuning.truecased ignore-unless: output-truecaser template: $output-truecaser -model IN1.$output-extension < IN > OUT split-tuning in: lowercased-tuning SPLITTER:splitter-model out: split-tuning rerun-on-change: output-splitter default-name: lm/interpolate-tuning.split pass-unless: output-splitter template: $output-splitter -model IN1.$output-extension < IN > OUT post-split-factorize-tuning in: split-tuning out: post-split-factorized-tuning default-name: lm/interpolate-tuning.split-factored rerun-on-change: TRAINING:input-factors TRAINING:output-factors pass-unless: factorize-after-split parallelizable: yes error: can't open error: incompatible number of words in factor strip-tuning in: post-split-factorized-tuning out: stripped-tuning default-name: lm/interpolate-tuning.stripped pass-unless: mock-output-parser-lm template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees interpolate in: script stripped-tuning LM:lm rerun-on-change: srilm-dir group weights out: lm default-name: lm/interpolated-lm randomize in: lm out: rlm pass-unless: lm-randomizer default-name: lm/interpolated-rlm quantize in: rlm out: qlm pass-unless: lm-quantizer default-name: lm/interpolated-qlm binarize in: qlm out: binlm pass-unless: lm-binarizer ignore-unless: script rerun-on-change: lm default-name: lm/interpolated-binlm error: set kMaxOrder to at least this value final-model: yes [MML] single tokenize-indomain-source in: raw-indomain-source out: tokenized-indomain-source default-name: mml/indomain-source.tok pass-unless: input-tokenizer template: $input-tokenizer < IN > OUT parallelizable: yes factorize-indomain-source in: tokenized-indomain-source out: factorized-indomain-source rerun-on-change: TRAINING:input-factors default-name: mml/indomain-source.factored pass-unless: factors parallelizable: yes error: can't open error: incompatible number of words in factor lowercase-indomain-source in: factorized-indomain-source out: lowercased-indomain-source default-name: mml/indomain-source.lowercased pass-unless: input-lowercaser ignore-if: input-truecaser only-factor-0: yes template: $input-lowercaser < IN > OUT parallelizable: yes truecase-indomain-source in: factorized-indomain-source TRUECASER:truecase-model out: lowercased-indomain-source rerun-on-change: input-truecaser default-name: mml/indomain-source.truecased ignore-unless: input-truecaser only-factor-0: yes template: $input-truecaser -model IN1.$input-extension < IN > OUT parallelizable: yes split-indomain-source in: lowercased-indomain-source SPLITTER:splitter-model out: indomain-source rerun-on-change: input-splitter default-name: mml/indomain-source.split pass-unless: input-splitter template: $input-splitter -model IN1.$input-extension < IN > OUT tokenize-indomain-target in: raw-indomain-target out: tokenized-indomain-target default-name: mml/indomain-target.tok pass-unless: output-tokenizer template: $output-tokenizer < IN > OUT parallelizable: yes factorize-indomain-target in: tokenized-indomain-target out: factorized-indomain-target rerun-on-change: TRAINING:output-factors default-name: mml/indomain-target.factored pass-unless: factors parallelizable: yes error: can't open error: incompatible number of words in factor lowercase-indomain-target in: factorized-indomain-target out: lowercased-indomain-target default-name: mml/indomain-target.lowercased pass-unless: output-lowercaser ignore-if: output-truecaser only-factor-0: yes template: $output-lowercaser < IN > OUT parallelizable: yes truecase-indomain-target in: factorized-indomain-target TRUECASER:truecase-model out: lowercased-indomain-target rerun-on-change: output-truecaser default-name: mml/indomain-target.truecased ignore-unless: output-truecaser only-factor-0: yes template: $output-truecaser -model IN1.$output-extension < IN > OUT parallelizable: yes split-indomain-target in: lowercased-indomain-target SPLITTER:splitter-model out: indomain-target rerun-on-change: output-splitter default-name: mml/indomain-target.split pass-unless: output-splitter template: $output-splitter -model IN1.$output-extension < IN > OUT train in: indomain-stem outdomain-stem out: model rerun-on-change: settings ignore-unless: indomain-stem default-name: mml/model template: $moses-script-dir/ems/support/mml-train.perl -in-source IN.$input-extension -in-target IN.$output-extension -out-source IN1.$input-extension -out-target IN1.$output-extension -model OUT -lm-training "$lm-training" -order $order -lm-settings "$lm-settings" -lm-binarizer $lm-binarizer $settings train-in-mono in: indomain-source indomain-target outdomain-stem out: model rerun-on-change: settings ignore-if: indomain-stem default-name: mml/model template: $moses-script-dir/ems/support/mml-train.perl -in-source IN -in-target IN1 -out-source IN2.$input-extension -out-target IN2.$output-extension -model OUT -lm-training "$lm-training" -order $order -lm-settings "$lm-settings" -lm-binarizer $lm-binarizer $settings [TRAINING] single consolidate in: CORPUS:post-split-factorized-stem out: corpus default-name: corpus template: $moses-script-dir/ems/support/consolidate-training-data.perl $input-extension $output-extension OUT IN build-domains in: CORPUS:post-split-factorized-stem out: domains default-name: model/domains ignore-unless: domain-features mml-filter-corpora interpolated-operation-sequence-model template: $moses-script-dir/ems/support/build-domain-file-from-subcorpora.perl $input-extension IN > OUT final-model: yes mml-score in: MML:model corpus domains out: mml-scores ignore-unless: mml-before-wa mml-after-wa rerun-on-change: mml-filter-corpora default-name: training/corpus-mml-score template: $moses-script-dir/ems/support/mml-score.perl -model IN -corpus IN1 -domains IN2 -input-extension $input-extension -output-extension $output-extension -query $MML:lm-query -filter-domains "$mml-filter-corpora" > OUT mml-filter-before-wa in: corpus mml-scores domains out: corpus-mml-prefilter ignore-unless: mml-before-wa rerun-on-change: mml-filter-corpora mml-before-wa default-name: training/corpus-mml template: $moses-script-dir/ems/support/mml-filter.perl -in IN -out OUT -score IN1 -domain IN2 -input-extension $input-extension -output-extension $output-extension $mml-before-wa prepare-data-fast-align in: corpus-mml-prefilter=OR=corpus out: prepared-data-fast-align default-name: prepared fast-align in: prepared-data-fast-align out: fast-alignment rerun-on-change: fast-align-settings ignore-if: fast-align-max-lines fast-align-save-model template: $external-bin-dir/fast_align -i IN $fast-align-settings > OUT default-name: fast-align fast-align-inverse in: prepared-data-fast-align out: fast-alignment-inverse rerun-on-change: fast-align-settings ignore-if: fast-align-max-lines fast-align-save-model template: $external-bin-dir/fast_align -i IN -r $fast-align-settings > OUT default-name: fast-align-inverse fast-align-in-parts in: prepared-data-fast-align out: fast-alignment rerun-on-change: fast-align-settings fast-align-max-lines ignore-unless: fast-align-max-lines tmp-name: training/tmp.fast-align template: $moses-script-dir/ems/support/fast-align-in-parts.perl -bin $external-bin-dir/fast_align -i IN -max-lines $fast-align-max-lines -tmp TMP -settings '$fast-align-settings' -save-model '$?fast-align-save-model' -o OUT default-name: fast-align fast-align-in-parts-inverse in: prepared-data-fast-align out: fast-alignment-inverse rerun-on-change: fast-align-settings fast-align-max-lines ignore-unless: fast-align-max-lines tmp-name: training/tmp.fast-align-inverse template: $moses-script-dir/ems/support/fast-align-in-parts.perl -bin $external-bin-dir/fast_align -i IN -r -max-lines $fast-align-max-lines -tmp TMP -settings '$fast-align-settings' -save-model '$?fast-align-save-model' -o OUT default-name: fast-align-inverse fast-align-save-model in: prepared-data-fast-align out: fast-alignment ignore-unless: fast-align-save-model ignore-if: fast-align-max-lines default-name: fast-align tmp-name: training/tmp.fast-align-inverse template: $external-bin-dir/fast_align -i IN $fast-align-settings -p OUT.parameters > OUT 2> OUT.log fast-align-save-model-inverse in: prepared-data-fast-align out: fast-alignment-inverse ignore-unless: fast-align-save-model ignore-if: fast-align-max-lines default-name: fast-align-inverse tmp-name: training/tmp.fast-align-inverse template: $external-bin-dir/fast_align -r -i IN $fast-align-settings -p OUT.parameters > OUT 2> OUT.log symmetrize-fast-align in: fast-alignment fast-alignment-inverse corpus-mml-prefilter=OR=corpus out: word-alignment ignore-unless: fast-align-settings rerun-on-change: alignment-symmetrization-method template: $moses-script-dir/ems/support/symmetrize-fast-align.perl IN IN1 IN2.$input-extension IN2.$output-extension OUT $alignment-symmetrization-method $moses-src-dir/bin/symal default-name: model/aligned prepare-data in: corpus-mml-prefilter=OR=corpus out: prepared-data rerun-on-change: alignment-factors training-options script baseline-alignment-model external-bin-dr ignore-if: use-berkeley default-name: prepared run-giza in: prepared-data out: giza-alignment ignore-if: use-berkeley rerun-on-change: giza-settings training-options script baseline-alignment-model external-bin-dir default-name: giza error: not found not-error: 0 not found run-giza-inverse in: prepared-data out: giza-alignment-inverse rerun-on-change: giza-settings training-options script baseline-alignment-model external-bin-dir ignore-if: use-berkeley default-name: giza-inverse error: not found not-error: 0 not found run-berkeley in: corpus-mml-prefilter out: berkeley-alignment ignore-unless: use-berkeley rerun-on-change: berkeley-train berkeley-jar berkeley-training-options default-name: berkeley template: $berkeley-train " $berkeley-java-options " $berkeley-jar IN OUT $input-extension $output-extension $berkeley-training-options not-error: 0 errors, process-berkeley in: corpus-mml-prefilter berkeley-alignment out: word-alignment default-name: model/aligned rerun-on-change: berkeley-process berkeley-jar berkeley-posterior berkeley-process-options ignore-unless: use-berkeley template: $berkeley-process " $berkeley-java-options " $berkeley-jar IN IN1 OUT $input-extension $output-extension $alignment-symmetrization-method $berkeley-posterior $berkeley-process-options not-error: 0 errors, symmetrize-giza in: giza-alignment giza-alignment-inverse out: word-alignment ignore-if: use-berkeley fast-align-settings rerun-on-change: alignment-symmetrization-method training-options script default-name: model/aligned error: skip=<[1-9] mml-filter-after-wa in: corpus-mml-prefilter=OR=corpus word-alignment mml-scores corpus-mml-prefilter=OR=domains out: corpus-mml-postfilter ignore-unless: mml-after-wa rerun-on-change: mml-filter-corpora mml-after-wa default-name: model/corpus-mml template: $moses-script-dir/ems/support/mml-filter.perl -in IN -out OUT -alignment IN1 -score IN2 -domain IN3 -input-extension $input-extension -output-extension $output-extension $mml-after-wa build-biconcor in: corpus-mml-postfilter=OR=word-alignment corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: biconcor-model default-name: model/biconcor ignore-unless: biconcor error: usage final-model: yes build-suffix-array in: corpus-mml-postfilter=OR=word-alignment corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: sigtest-filter-phrase-translation-table default-name: model/suffix-array ignore-unless: suffix-array error: usage build-lex-trans in: corpus-mml-postfilter=OR=word-alignment corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: lexical-translation-table rerun-on-change: translation-factors training-options script default-name: model/lex parse-relax in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: parse-relaxed-corpus default-name: model/parsed-relaxed pass-unless: input-parse-relaxer output-parse-relaxer template-if: input-parse-relaxer IN.$input-extension OUT.$input-extension template-if: output-parse-relaxer IN.$output-extension OUT.$output-extension pcfg-extract in: parse-relaxed-corpus out: pcfg default-name: model/pcfg ignore-unless: use-pcfg-feature rerun-on-change: use-pcfg-feature template: $moses-bin-dir/pcfg-extract < IN.$output-extension > OUT.$output-extension pcfg-score in: parse-relaxed-corpus pcfg out: scored-corpus default-name: model/scored-corpus pass-unless: use-pcfg-feature template: ln -s IN.$input-extension OUT.$input-extension ; $moses-bin-dir/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension build-osm in: corpus word-alignment out: osm-model ignore-unless: operation-sequence-model rerun-on-change: operation-sequence-model training-options script giza-settings operation-sequence-model-settings template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --input-extension $input-extension --output-extension $output-extension $operation-sequence-model-settings default-name: model/OSM build-interpolated-osm in: corpus word-alignment domains out: osm-model ignore-unless: interpolated-operation-sequence-model rerun-on-change: interpolated-operation-sequence-model training-options script giza-settings operation-sequence-model-settings template: $moses-script-dir/OSM/OSM-Train.perl --corpus-f IN0.$input-extension --corpus-e IN0.$output-extension --alignment IN1.$alignment-symmetrization-method --order $operation-sequence-model-order --out-dir OUT --moses-src-dir $moses-src-dir --input-extension $input-extension --output-extension $output-extension $operation-sequence-model-settings --domain IN2 default-name: model/OSM build-transliteration-model in: corpus word-alignment out: transliteration-model ignore-unless: transliteration-module rerun-on-change: transliteration-module training-options script giza-settings default-name: model/Transliteration final-model: yes build-translit-table in: transliteration-model out: transliteration-table ignore-unless: in-decoding-transliteration rerun-on-change: in-decoding-transliteration transliteration-module default-name: model/transliteration-phrase-table template: $moses-script-dir/Transliteration/in-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN --input-extension $input-extension --output-extension $output-extension --transliteration-file $transliteration-file --out-file OUT extract-phrases in: corpus-mml-postfilter=OR=word-alignment scored-corpus out: extracted-phrases rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm domain-features baseline-extract lexicalized-reordering pass-if: mmsapt only-existence-matters: domain-features default-name: model/extract build-reordering in: extracted-phrases out: reordering-table ignore-unless: lexicalized-reordering pass-if: mmsapt rerun-on-change: lexicalized-reordering reordering-factors default-name: model/reordering-table final-model: yes build-ttable in: extracted-phrases lexical-translation-table corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains out: phrase-translation-table rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script include-word-alignment-in-rules domain-features default-name: model/phrase-table ignore-if: suffix-array mmsapt final-model: yes build-mmsapt in: corpus-mml-postfilter=OR=word-alignment corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: sigtest-filter-phrase-translation-table ignore-unless: mmsapt default-name: model/phrase-table-mmsapt template: $moses-script-dir/training/build-mmsapt.perl --alignment IN.$alignment-symmetrization-method --corpus IN1 --f $input-extension --e $output-extension --dir OUT --settings '$mmsapt' final-model: yes custom-phrase-table-pruning in: phrase-translation-table out: sigtest-filter-phrase-translation-table ignore-unless: custom-phrase-table-pruning ignore-if: mmsapt template: $custom-phrase-table-pruning IN OUT default-name: model/phrase-table-pruned sigtest-filter-suffix-array in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: sigtest-filter-suffix-array default-name: training/corpus template: $salm-index IN.$input-extension ; \ mv IN.${input-extension}.id_voc OUT.${input-extension}.id_voc ; \ mv IN.${input-extension}.sa_corpus OUT.${input-extension}.sa_corpus ; \ mv IN.${input-extension}.sa_offset OUT.${input-extension}.sa_offset ; \ mv IN.${input-extension}.sa_suffix OUT.${input-extension}.sa_suffix ; \ $salm-index IN.$output-extension ; \ mv IN.${output-extension}.id_voc OUT.${output-extension}.id_voc ; \ mv IN.${output-extension}.sa_corpus OUT.${output-extension}.sa_corpus ; \ mv IN.${output-extension}.sa_offset OUT.${output-extension}.sa_offset ; \ mv IN.${output-extension}.sa_suffix OUT.${output-extension}.sa_suffix ignore-unless: sigtest-filter final-model: yes sigtest-filter-ttable in: phrase-translation-table sigtest-filter-suffix-array out: sigtest-filter-phrase-translation-table default-name: model/phrase-table-sigtest-filter pass-unless: sigtest-filter ignore-if: TRAINING:config custom-phrase-table-pruning final-model: yes sigtest-filter-reordering in: reordering-table sigtest-filter-suffix-array out: sigtest-filter-reordering-table default-name: model/reordering-table-sigtest-filter pass-unless: sigtest-filter ignore-if: TRAINING:config ignore-unless: lexicalized-reordering final-model: yes build-generation in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: generation-table rerun-on-change: generation-factors generation-type training-options script ignore-unless: generation-factors ignore-if: generation-corpus default-name: model/generation-table final-model: yes build-generation-custom in: generation-corpus out: generation-table rerun-on-change: generation-factors generation-type training-options script generation-corpus ignore-unless: AND generation-factors generation-corpus default-name: model/generation-table final-model: yes generation-prune in: generation-table out: generation-table-pruned rerun-on-change: TRAINING:prune-generation pass-unless: TRAINING:prune-generation ignore-unless: generation-factors default-name: model/generation-table-pruned final-model: yes template: $TRAINING:prune-generation IN OUT build-sparse in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: sparse ignore-unless: sparse-features rerun-on-change: sparse-features default-name: model/sparse-features template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features" create-config in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table-pruned sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm out: config ignore-if: use-hiero thot rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature operation-sequence-model-load-method default-name: model/moses.ini error: Unknown option error: requires an argument final-model: yes binarize-config in: config out: bin-config pass-unless: binarize-all rerun-on-change: config default-name: model/moses.bin.ini template: $binarize-all IN OUT -Binarizer "$ttable-binarizer" final-model: yes hiero-compile-source-suffix-array in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: hiero-source-suffix-array ignore-unless: use-hiero default-name: hiero-model/f.sa.bin template: $hiero-decode-dir/compile_bin.py -s IN.$input-extension OUT hiero-compile-target in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus out: hiero-target-array ignore-unless: use-hiero default-name: hiero-model/e.bin template: $hiero-decode-dir/compile_bin.py IN.$output-extension OUT hiero-compile-alignment in: corpus-mml-postfilter=OR=word-alignment out: hiero-alignment-array ignore-unless: use-hiero default-name: hiero-model/a.bin template: $hiero-decode-dir/compile_bin.py -a IN.$alignment-symmetrization-method OUT hiero-compile-lex in: hiero-alignment-array hiero-source-suffix-array hiero-target-array out: hiero-lex-array ignore-unless: use-hiero default-name: hiero-model/lex.bin template: $hiero-decode-dir/compile_bin.py -x IN1 IN2 IN OUT hiero-find-frequencies in: hiero-source-suffix-array out: hiero-topN ignore-unless: use-hiero default-name: hiero-model/f.topN template: $hiero-decode-dir/lcp_ops.py -t 4 IN | sort -nr | head -100 > OUT hiero-compile-precomputations in: hiero-topN hiero-source-suffix-array out: hiero-precomputation-array ignore-unless: use-hiero default-name: hiero-model/f.precomputations.bin rerun-on-change: hiero-max-phrase-length hiero-max-nonterminals hiero-max-phrase-span hiero-min-gap-length hiero-freq-rank1 hiero-freq-rank2 template: $hiero-decode-dir/compile_bin.py -r max-len=$hiero-max-phrase-length max-nt=$hiero-max-nonterminals max-size=$hiero-max-phrase-span min-gap=$hiero-min-gap-length rank1=$hiero-freq-rank1 rank2=$hiero-freq-rank2 sa=IN1 IN OUT hiero-create-config in: hiero-source-suffix-array hiero-target-array hiero-alignment-array hiero-lex-array hiero-precomputation-array LM:lm out: hiero-config ignore-unless: use-hiero rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors default-name: hiero-model/hiero.ini template: $hiero-util-dir/generate-ini.pl IN IN1 IN2 IN3 IN4 IN5 $hiero-max-phrase-length $hiero-max-nonterminals $hiero-max-phrase-span $hiero-min-gap-length $hiero-freq-rank1 $hiero-freq-rank2 < $GENERAL:hiero-template-ini > OUT thot-build-ttable in: corpus out: thot-ttable default-name: model/phrase-table-thot rerun-on-change: input-extension output-extension template: $thot/thot_tm_train -sdir $working-dir -s IN.$input-extension -t IN.$output-extension -o OUT thot-create-config in: thot-ttable LM:lm out: config ignore-unless: thot default-name: model/thot.ini template: $thot/thot_gen_cfg_file IN1/lm_desc IN/tm_desc > OUT [TUNING] single input-from-sgm in: input-sgm out: raw-input default-name: tuning/input.txt template: $moses-script-dir/ems/support/input-from-sgm.perl < IN > OUT input-devtest-from-sgm in: input-devtest-sgm out: raw-input-devtest default-name: tuning/input.devtest.txt ignore-unless: use-mira template: $moses-script-dir/ems/support/input-from-sgm.perl < IN > OUT tokenize-input in: raw-input out: tokenized-input default-name: tuning/input.tok pass-unless: input-tokenizer template: $input-tokenizer < IN > OUT tokenize-input-devtest in: raw-input-devtest out: tokenized-input-devtest default-name: tuning/input.devtest.tok pass-unless: input-tokenizer ignore-unless: use-mira template: $input-tokenizer < IN > OUT mock-parse-input in: tokenized-input out: mock-parsed-input default-name: tuning/input.mock-parsed pass-unless: mock-input-parser-devtesteval template: $mock-input-parser-devtesteval < IN > OUT mock-parse-input-devtest in: tokenized-input-devtest out: mock-parsed-input-devtest default-name: tuning/input.devtest.mock-parsed pass-unless: mock-input-parser-devtesteval ignore-unless: use-mira template: $mock-input-parser-devtesteval < IN > OUT parse-input in: mock-parsed-input out: parsed-input default-name: tuning/input.parsed pass-unless: input-parser pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval template: $input-parser < IN > OUT parse-input-devtest in: mock-parsed-input-devtesteval out: parsed-input-devtest default-name: tuning/input.devtest.parsed pass-unless: input-parser pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval ignore-unless: use-mira template: $input-parser < IN > OUT factorize-input in: parsed-input out: factorized-input default-name: tuning/input.factorized rerun-on-change: TRAINING:input-factors pass-unless: TRAINING:input-factors error: can't open error: incompatible number of words in factor factorize-input-devtest in: parsed-input-devtest out: factorized-input-devtest default-name: tuning/input.devtest.factorized rerun-on-change: TRAINING:input-factors pass-unless: TRAINING:input-factors ignore-unless: use-mira error: can't open error: incompatible number of words in factor source-label-input in: factorized-input out: source-labelled-input default-name: tuning/input.labelled pass-unless: source-labeller template-if: source-labeller IN OUT parallelizable: yes source-label-input-devtest in: factorized-input-devtest out: source-labelled-input-devtest default-name: tuning/input.devtest.labelled pass-unless: source-labeller template-if: source-labeller IN OUT parallelizable: yes lowercase-input in: source-labelled-input out: truecased-input default-name: tuning/input.lc pass-unless: input-lowercaser ignore-if: input-truecaser template: $input-lowercaser < IN > OUT lowercase-input-devtest in: source-labelled-input-devtest out: truecased-input-devtest default-name: tuning/input.devtest.lc pass-unless: input-lowercaser ignore-unless: use-mira ignore-if: input-truecaser template: $input-lowercaser < IN > OUT truecase-input in: source-labelled-input TRUECASER:truecase-model out: truecased-input rerun-on-change: input-truecaser default-name: tuning/input.tc ignore-unless: input-truecaser template: $input-truecaser -model IN1.$input-extension < IN > OUT truecase-input-devtest in: source-labelled-input-devtest TRUECASER:truecase-model out: truecased-input-devtest rerun-on-change: input-truecaser default-name: tuning/input.devtest.tc ignore-unless: AND input-truecaser use-mira template: $input-truecaser -model IN1.$input-extension < IN > OUT split-input in: truecased-input SPLITTER:splitter-model out: split-input rerun-on-change: input-splitter default-name: tuning/input.split pass-unless: input-splitter template: $input-splitter -model IN1.$input-extension < IN > OUT split-input-devtest in: truecased-input-devtest SPLITTER:splitter-model out: split-input-devtest rerun-on-change: input-splitter default-name: tuning/input.devtest.split pass-unless: input-splitter ignore-unless: use-mira template: $input-splitter -model IN1.$input-extension < IN > OUT parse-relax-input in: split-input out: input default-name: tuning/input.parse-relaxed pass-unless: input-parse-relaxer pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval template: $input-parse-relaxer < IN > OUT parse-relax-input-devtest in: split-input-devtest out: input-devtest default-name: tuning/input.devtest.parse-relaxed pass-unless: input-parse-relaxer pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval ignore-unless: use-mira template: $input-parse-relaxer < IN > OUT reference-from-sgm in: reference-sgm input-sgm out: raw-reference default-name: tuning/reference.txt template: $moses-script-dir/ems/support/reference-from-sgm.perl IN IN1 OUT reference-devtest-from-sgm in: reference-devtest-sgm input-devtest-sgm out: raw-reference-devtest default-name: tuning/reference.devtest.txt ignore-unless: use-mira template: $moses-script-dir/ems/support/reference-from-sgm.perl IN IN1 OUT tokenize-reference in: raw-reference out: tokenized-reference default-name: tuning/reference.tok pass-unless: output-tokenizer multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-tokenizer < IN > OUT tokenize-reference-devtest in: raw-reference-devtest out: tokenized-reference-devtest default-name: tuning/reference.devtest.tok pass-unless: output-tokenizer ignore-unless: use-mira multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-tokenizer < IN > OUT mock-parse-reference in: tokenized-reference out: mock-parsed-reference default-name: tuning/reference.mock-parsed pass-unless: mock-output-parser-references template: $mock-output-parser-references < IN > OUT mock-parse-reference-devtest in: tokenized-input-devtest out: mock-parsed-reference-devtest default-name: tuning/reference.devtest.mock-parsed pass-unless: mock-output-parser-references template: $mock-output-parser-references < IN > OUT lowercase-reference in: mock-parsed-reference out: truecased-reference default-name: tuning/reference.lc pass-unless: output-lowercaser ignore-if: output-truecaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-lowercaser < IN > OUT lowercase-reference-devtest in: mock-parsed-reference-devtest out: truecased-reference-devtest default-name: tuning/reference.devtest.lc pass-unless: output-lowercaser ignore-if: output-truecaser ignore-unless: use-mira multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-lowercaser < IN > OUT truecase-reference in: mock-parsed-reference TRUECASER:truecase-model out: truecased-reference rerun-on-change: output-truecaser default-name: tuning/reference.tc ignore-unless: output-truecaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-truecaser -model IN1.$output-extension < IN > OUT truecase-reference-devtest in: mock-parsed-reference-devtest TRUECASER:truecase-model out: truecased-reference-devtest rerun-on-change: output-truecaser default-name: tuning/reference.devtest.tc ignore-unless: AND output-truecaser use-mira multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-truecaser -model IN1.$output-extension < IN > OUT split-reference in: truecased-reference SPLITTER:splitter-model out: split-ref default-name: tuning/reference.split pass-unless: output-splitter multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-splitter -model IN1.$output-extension < IN > OUT split-reference-devtest in: truecased-reference-devtest SPLITTER:splitter-model out: split-ref-devtest default-name: tuning/reference.devtest.split pass-unless: output-splitter ignore-unless: use-mira multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-splitter -model IN1.$output-extension < IN > OUT strip-reference in: split-ref out: reference default-name: tuning/reference.stripped pass-unless: mock-output-parser-references multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees strip-reference-devtest in: split-ref-devtest out: reference default-name: tuning/reference.devtest.stripped pass-unless: mock-output-parser-references ignore-unless: use-mira multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees filter in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table out: filtered-dir default-name: tuning/filtered rerun-on-change: filter-settings ttable-binarizer TRAINING:no-glue-grammar TRAINING:dont-tune-glue-grammar TRAINING:use-syntax-input-weight-feature TRAINING:config ignore-if: TRAINING:binarize-all TRAINING:mmsapt error: already exists. Please delete filter-devtest in: input-devtest TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table out: filtered-dir-devtest default-name: tuning/filtered.devtest rerun-on-change: filter-settings ttable-binarizer TRAINING:no-glue-grammar TRAINING:dont-tune-glue-grammar TRAINING:use-syntax-input-weight-feature TRAINING:config ignore-if: TRAINING:binarize-all ignore-unless: use-mira error: already exists. Please delete apply-filter in: TRAINING:bin-config filtered-dir out: filtered-config default-name: tuning/moses.filtered.ini ignore-if: TRAINING:binarize-all TRAINING:mmsapt template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT apply-filter-devtest in: TRAINING:bin-config filtered-dir-devtest out: filtered-config-devtest default-name: tuning/moses.filtered.devtest.ini pass-if: TRAINING:binarize-all ignore-unless: use-mira template: $moses-script-dir/ems/support/substitute-filtered-tables.perl IN1/moses.ini < IN > OUT tune in: TRAINING:bin-config input reference filtered-config-devtest input-devtest reference-devtest filtered-config out: weight-config ignore-if: use-hiero qsub-script: yes default-name: tuning/moses.ini tmp-name: tuning/tmp final-model: yes rerun-on-change: decoder decoder-settings tuning-settings nbest lambda async not-error: trans: No such file or directory thot-tune in: TRAINING:config input reference out: config-with-reused-weights ignore-unless: thot tmp-name: tuning/thot.tmp default-name: tuning/thot.tuned.ini template: mkdir -p TMP/home ; mkdir -p TMP/tdir ; mkdir -p TMP/sdir ; HOME=TMP/home $thot/thot_smt_tune -tdir TMP/tdir -sdir TMP/sdir -c IN -s IN1 -t IN2 -o OUT apply-weights in: TRAINING:bin-config weight-config out: config-with-reused-weights ignore-if: use-hiero thot default-name: tuning/moses.tuned.ini template: $moses-script-dir/ems/support/substitute-weights.perl IN IN1 OUT error: cannot open hiero-tune in: TRAINING:hiero-config input reference out: hiero-weight-config ignore-unless: use-hiero qsub-script: yes default-name: hiero-tuning/mert rerun-on-change: nbest template: $hiero-mert --nbest $nbest --decoder $hiero-decoder --workdir OUT IN --source-file IN1 --ref-files "IN2*" --no-test hiero-apply-weights in: hiero-weight-config TRAINING:hiero-config out: hiero-config-with-reused-weights default-name: hiero-tuning/hiero.weight-reused.ini ignore-unless: use-hiero template: $hiero-util-dir/apply-weights.pl IN/best.weights < IN1 > OUT [EVALUATION] multiple input-from-sgm in: input-sgm out: raw-input ignore-unless: input-sgm default-name: evaluation/input.txt template: $moses-script-dir/ems/support/input-from-sgm.perl < IN > OUT get-input in: get-corpus-script out: raw-input ignore-if: input-sgm default-name: evaluation/input.txt template: IN OUT tokenize-input in: raw-input out: tokenized-input default-name: evaluation/input.tok pass-unless: input-tokenizer template: $input-tokenizer < IN > OUT mock-parse-input in: tokenized-input out: mock-parsed-input default-name: evaluation/input.mock-parsed pass-unless: mock-input-parser-devtesteval template: $mock-input-parser-devtesteval < IN > OUT factorize-input in: parsed-input out: factorized-input default-name: evaluation/input.factorized rerun-on-change: TRAINING:input-factors pass-unless: TRAINING:input-factors error: can't open error: incompatible number of words in factor source-label-input in: factorized-input out: source-labelled-input default-name: evaluation/input.labelled pass-unless: source-labeller template-if: source-labeller IN OUT parallelizable: yes lowercase-input in: source-labelled-input out: truecased-input default-name: evaluation/input.lc pass-unless: input-lowercaser ignore-if: input-truecaser template: $input-lowercaser < IN > OUT truecase-input in: source-labelled-input TRUECASER:truecase-model out: truecased-input default-name: evaluation/input.tc rerun-on-change: input-truecaser ignore-unless: input-truecaser template: $input-truecaser -model IN1.$input-extension < IN > OUT split-input in: truecased-input SPLITTER:splitter-model out: split-input default-name: evaluation/input.split pass-unless: input-splitter template: $input-splitter -model IN1.$input-extension < IN > OUT parse-input in: mock-parsed-input out: parsed-input default-name: evaluation/input.parsed pass-unless: input-parser pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval template: $input-parser < IN > OUT parse-relax-input in: split-input out: input default-name: evaluation/input.parse-relaxed pass-unless: input-parse-relaxer pass-if: skip-parse-input-devtesteval mock-input-parser-devtesteval template: $input-parse-relaxer < IN > OUT filter in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table out: filtered-dir default-name: evaluation/filtered rerun-on-change: filter-settings report-precision-by-coverage ttable-binarizer TRAINING:no-glue-grammar TRAINING:dont-tune-glue-grammar TRAINING:use-syntax-input-weight-feature TRAINING:config pass-if: TRAINING:binarize-all TRAINING:mmsapt ignore-if: use-hiero error: already exists. Please delete apply-filter in: filtered-dir TRAINING:config TUNING:config-with-reused-weights out: filtered-config default-name: evaluation/filtered.ini ignore-if: TRAINING:binarize-all TRAINING:mmsapt thot template: $moses-script-dir/ems/support/substitute-filtered-tables-and-weights.perl IN/moses.ini IN1 IN2 OUT decode in: TUNING:config-with-reused-weights input filtered-config out: system-output default-name: evaluation/output qsub-script: yes ignore-if: use-hiero thot rerun-on-change: decoder decoder-settings nbest report-segmentation report-precision-by-coverage analyze-search-graph wade TRAINING:post-decoding-transliteration error: Translation was not performed correctly not-error: trans: No such file or directory final-model: yes hiero-decode in: TUNING:hiero-config-with-reused-weights input out: system-output default-name: evaluation/output qsub-script: yes ignore-unless: use-hiero template: $hiero-parallelizer -e OUT.edir -r -- $hiero-decoder -c IN < IN1 > OUT rerun-on-change: hiero-decoder thot-filter in: TUNING:config-with-reused-weights input out: filtered-config ignore-unless: thot default-name: evaluation/filtered tmp-name: evaluation/filtered-tmp template: mkdir -p TMP/home ; mkdir -p TMP/tdir ; mkdir -p TMP/sdir ; HOME=TMP/home $thot/thot_prepare_sys_for_test -sdir TMP/sdir -tdir TMP/tdir -t IN1 -c IN/tuned_for_dev.cfg -o OUT ; cp OUT/lm/main/* OUT/lm thot-decode in: input filtered-config out: system-output ignore-unless: thot default-name: evaluation/output template: $thot/thot_decoder -sdir $working-dir -c IN1/test_specific.cfg -t IN > OUT not-error: Error in word penalty model file remove-markup in: system-output out: cleaned-output default-name: evaluation/cleaned pass-if: TRAINING:hierarchical-rule-set pass-unless: report-segmentation template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT post-decoding-transliteration in: cleaned-output system-output TRAINING:transliteration-model INTERPOLATED-LM:binlm=OR=LM:binlm out: transliterated-output default-name: evaluation/transliterated pass-unless: TRAINING:post-decoding-transliteration template: $moses-script-dir/Transliteration/post-decoding-transliteration.pl --moses-src-dir $moses-src-dir --external-bin-dir $external-bin-dir --transliteration-model-dir IN2 --input-extension $input-extension --output-extension $output-extension --language-model IN3 --input-file IN0 --output-file OUT --oov-file IN1.oov --decoder $decoder recase-output in: transliterated-output RECASING:recase-config out: recased-output default-name: evaluation/recased pass-unless: recaser ignore-if: output-truecaser template: $recaser -moses $RECASING:decoder -in IN -model IN1 > OUT detruecase-output in: transliterated-output out: recased-output default-name: evaluation/truecased ignore-unless: output-truecaser template: $detruecaser < IN > OUT detokenize-output in: recased-output out: detokenized-output default-name: evaluation/detokenized pass-unless: detokenizer template: $detokenizer < IN > OUT final-model: yes wrap in: detokenized-output out: wrapped-output default-name: evaluation/detokenized.sgm rerun-on-change: wrapping-frame use-hiero template: $wrapping-script $wrapping-frame < IN > OUT error: Use of uninitialized value in pattern match final-model: yes reference-from-sgm in: reference-sgm input-sgm out: raw-reference default-name: evaluation/reference.txt template: $moses-script-dir/ems/support/reference-from-sgm.perl IN IN1 OUT tokenize-reference in: raw-reference out: tokenized-reference default-name: evaluation/reference.tok pass-unless: output-tokenizer multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-tokenizer < IN > OUT mock-parse-reference in: tokenized-reference out: mock-parsed-reference default-name: evaluation/reference.mock-parsed pass-unless: mock-output-parser-references template: $mock-output-parser-references < IN > OUT lowercase-reference in: mock-parsed-reference out: lowercased-reference default-name: evaluation/reference.lowercased pass-unless: output-lowercaser pass-if: recaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-lowercaser < IN > OUT strip-reference in: lowercased-reference out: reference default-name: evaluation/reference pass-unless: mock-output-parser-references multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees wade in: filtered-dir truecased-input tokenized-reference alignment system-output out: wade-analysis default-name: evaluation/wade-analysis ignore-unless: wade rerun-on-change: wade template: $moses-script-dir/ems/support/run-wade.perl $wade IN IN1 IN2 IN3 IN4 OUT nist-bleu in: wrapped-output reference-sgm out: nist-bleu-score default-name: evaluation/nist-bleu ignore-unless: nist-bleu rerun-on-change: nist-bleu error: Illegal division by zero template: $nist-bleu -s $input-sgm -r IN1 -t IN > OUT final-model: yes nist-bleu-c in: wrapped-output reference-sgm out: nist-bleu-c-score default-name: evaluation/nist-bleu-c ignore-unless: nist-bleu-c rerun-on-change: nist-bleu-c error: Illegal division by zero template: $nist-bleu-c -c -s $input-sgm -r IN1 -t IN > OUT final-model: yes ibm-bleu in: wrapped-output reference-sgm out: ibm-bleu-score default-name: evaluation/ibm-bleu ignore-unless: ibm-bleu rerun-on-change: ibm-bleu template: $ibm-bleu -ci -s $input-sgm -r IN1 -t IN > OUT final-model: yes ibm-bleu-c in: wrapped-output reference-sgm out: ibm-bleu-c-score default-name: evaluation/ibm-bleu-c ignore-unless: ibm-bleu-c rerun-on-change: ibm-bleu-c template: $ibm-bleu-c -s $input-sgm -r IN1 -t IN > OUT final-model: yes bolt-bleu in: detokenized-output out: bolt-bleu-score default-name: evaluation/bolt-bleu ignore-unless: bolt-bleu rerun-on-change: bolt-bleu template: $bolt-bleu IN > OUT final-model: yes bolt-bleu-c in: detokenized-output out: bolt-bleu-c-score default-name: evaluation/bolt-bleu-c ignore-unless: bolt-bleu-c rerun-on-change: bolt-bleu-c template: $bolt-bleu-c IN > OUT final-model: yes multi-bleu in: transliterated-output tokenized-reference out: multi-bleu-score default-name: evaluation/multi-bleu ignore-unless: multi-bleu rerun-on-change: multi-bleu template: $multi-bleu IN1 < IN > OUT final-model: yes multi-bleu-c in: recased-output tokenized-reference out: multi-bleu-c-score default-name: evaluation/multi-bleu-c ignore-unless: multi-bleu-c rerun-on-change: multi-bleu-c template: $multi-bleu-c IN1 < IN > OUT final-model: yes multi-bleu-detok in: detokenized-output raw-reference out: multi-bleu-detok-score default-name: evaluation/multi-bleu-detok ignore-unless: multi-bleu-detok rerun-on-change: multi-bleu-detok template: $multi-bleu-detok IN1 < IN > OUT final-model: yes multi-bleu-c-detok in: detokenized-output raw-reference out: multi-bleu-c-detok-score default-name: evaluation/multi-bleu-c-detok ignore-unless: multi-bleu-c-detok rerun-on-change: multi-bleu-c-detok template: $multi-bleu-c-detok IN1 < IN > OUT final-model: yes sacre-bleu in: detokenized-output raw-reference out: sacre-bleu-score default-name: evaluation/sacre-bleu ignore-unless: sacre-bleu rerun-on-change: sacre-bleu template: $sacre-bleu IN1 < IN > OUT final-model: yes sacre-bleu-c in: detokenized-output raw-reference out: sacre-bleu-c-score default-name: evaluation/sacre-bleu-c ignore-unless: sacre-bleu-c rerun-on-change: sacre-bleu-c template: $sacre-bleu-c IN1 < IN > OUT final-model: yes ter in: wrapped-output reference-sgm out: ter-score default-name: evaluation/detokenized.sgm.TER ignore-unless: ter rerun-on-change: ter final-model: yes wer in: recased-output reference out: wer-score default-name: evaluation/wer ignore-unless: wer rerun-on-change: wer template: $wer IN IN1 > OUT final-model: yes meteor in: transliterated-output reference out: meteor-score default-name: evaluation/meteor ignore-unless: meteor rerun-on-change: meteor template: $meteor IN IN1 $meteor-params > OUT final-model: yes analysis in: recased-output reference input out: analysis default-name: evaluation/analysis ignore-if: report-precision-by-coverage ignore-unless: analysis rerun-on-change: analyze-search-graph analysis-coverage in: input TRAINING:corpus-mml-postfilter=OR=TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus TRAINING:sigtest-filter-phrase-translation-table out: analysis-coverage default-name: evaluation/analysis ignore-unless: AND analysis analyze-coverage rerun-on-change: score-settings final-model: yes analysis-precision in: recased-output reference input TRAINING:corpus-mml-postfilter=OR=TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus TRAINING:sigtest-filter-phrase-translation-table analysis-coverage out: analysis default-name: evaluation/analysis ignore-unless: AND analysis analyze-coverage report-precision-by-coverage rerun-on-change: precision-by-coverage-base final-model: yes [QUALITY-ESTIMATION] single tokenize-input in: raw-input out: tokenized-input default-name: quality-estimation/input.tok pass-unless: input-tokenizer template: $input-tokenizer < IN > OUT tokenize-input-devtest in: raw-input-devtest out: tokenized-input-devtest default-name: quality-estimation/input.devtest.tok pass-unless: input-tokenizer template: $input-tokenizer < IN > OUT lowercase-input in: tokenized-input out: truecased-input default-name: quality-estimation/input.lc pass-unless: input-lowercaser ignore-if: input-truecaser template: $input-lowercaser < IN > OUT lowercase-input-devtest in: tokenized-input-devtest out: truecased-input-devtest default-name: quality-estimation/input.devtest.lc pass-unless: input-lowercaser ignore-if: input-truecaser template: $input-lowercaser < IN > OUT truecase-input in: tokenized-input TRUECASER:truecase-model out: truecased-input rerun-on-change: input-truecaser default-name: quality-estimation/input.tc ignore-unless: input-truecaser template: $input-truecaser -model IN1.$input-extension < IN > OUT truecase-input-devtest in: tokenized-input-devtest TRUECASER:truecase-model out: truecased-input-devtest rerun-on-change: input-truecaser ignore-unless: input-truecaser default-name: quality-estimation/input.devtest.tc template: $input-truecaser -model IN1.$input-extension < IN > OUT split-input in: truecased-input SPLITTER:splitter-model out: split-input rerun-on-change: input-splitter default-name: quality-estimation/input.split pass-unless: input-splitter template: $input-splitter -model IN1.$input-extension < IN > OUT split-input-devtest in: truecased-input-devtest SPLITTER:splitter-model out: split-input-devtest rerun-on-change: input-splitter default-name: quality-estimation/input.devtest.split pass-unless: input-splitter template: $input-splitter -model IN1.$input-extension < IN > OUT tokenize-reference in: raw-reference out: tokenized-reference default-name: quality-estimation/reference.tok pass-unless: output-tokenizer multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-tokenizer < IN > OUT tokenize-reference-devtest in: raw-reference-devtest out: tokenized-reference-devtest default-name: quality-estimation/reference.devtest.tok pass-unless: output-tokenizer multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-tokenizer < IN > OUT lowercase-reference in: tokenized-reference out: truecased-reference default-name: quality-estimation/reference.lc pass-unless: output-lowercaser ignore-if: output-truecaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-lowercaser < IN > OUT lowercase-reference-devtest in: tokenized-reference-devtest out: truecased-reference-devtest default-name: quality-estimation/reference.devtest.lc pass-unless: output-lowercaser ignore-if: output-truecaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-lowercaser < IN > OUT truecase-reference in: tokenized-reference TRUECASER:truecase-model out: truecased-reference rerun-on-change: output-truecaser default-name: quality-estimation/reference.tc ignore-unless: output-truecaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-truecaser -model IN1.$output-extension < IN > OUT truecase-reference-devtest in: tokenized-reference-devtest TRUECASER:truecase-model out: truecased-reference-devtest rerun-on-change: output-truecaser default-name: quality-estimation/reference.devtest.tc ignore-unless: output-truecaser multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl template: $output-truecaser -model IN1.$output-extension < IN > OUT decode in: TUNING:config-with-reused-weights split-input out: rich-output default-name: quality-estimation/output template: $decoder -v 0 -tt -f IN < IN1 > OUT error: Translation was not performed correctly not-error: trans: No such file or directory decode-devtest in: TUNING:config-with-reused-weights split-input-devtest out: rich-output-devtest default-name: quality-estimation/output-devtest template: $decoder -v 0 -tt -f IN < IN1 > OUT error: Translation was not performed correctly not-error: trans: No such file or directory remove-markup in: rich-output out: cleaned-output default-name: quality-estimation/tokenized-output template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT remove-markup-devtest in: rich-output-devtest out: cleaned-output-devtest default-name: quality-estimation/tokenized-output-devtest template: $moses-script-dir/ems/support/remove-segmentation-markup.perl < IN > OUT score-output in: cleaned-output truecased-reference out: scored-output default-name: quality-estimation/output-scored tmp-name: quality-estimation/ter template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT score-output-devtest in: cleaned-output-devtest truecased-reference-devtest out: scored-output-devtest default-name: quality-estimation/output-scored-devtest tmp-name: quality-estimation/ter-devtest template: mkdir TMP ; $moses-script-dir/ems/support/ter.perl $tercom IN IN1 TMP > OUT train in: input rich-output scored-output input-devtest rich-output-devtest scored-output-devtest out: quality-estimation-model default-name: quality-estimation/model template: $trainer --train-rich IN1 --train-ter IN2 --eval-rich IN4 --eval-ter IN5 --model OUT final-model: yes [REPORTING] single report in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:multi-bleu-detok-score EVALUATION:multi-bleu-c-detok-score EVALUATION:sacre-bleu-score EVALUATION:sacre-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model EVALUATION:wade-analysis out: report default-name: evaluation/report