File size: 5,666 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
#!/usr/bin/env python
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
r'''Generic Moses Wrapper
Run moses, wrapping various inputs and outputs
(useful as decoder-executable for mert-moses.pl)
mert-moses.pl \
--decoder wrap_moses.py --input src --refs ref --config moses.ini \
--decoder-flags="--wrap-input-file my_preproc_script.sh \
--wrap-n-best-list my_postproc_script.sh"
Commands are run through shell, so they may contain multiple piped commands
Anything not in the following list is passed through to moses as decoder flags
'''
import argparse
import os
import shutil
import subprocess
import sys
import tempfile
# ../../bin/moses
MOSES = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(
os.path.abspath(__file__)))), 'bin', 'moses')
def popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE):
'''Open command for streaming'''
return subprocess.Popen(cmd, shell=shell, stdin=stdin, stdout=stdout)
def main():
# Special args
parser = argparse.ArgumentParser()
parser.add_argument(
'--moses', help='Moses executable (default: {})'.format(MOSES),
default=MOSES)
parser.add_argument(
'--tmp', help='Temp directory parent (default: /tmp)', default='/tmp')
parser.add_argument(
'--wrap-input-file', metavar='CMD',
help='Pipe input file through this command')
parser.add_argument(
'--wrap-n-best-list', metavar='CMD',
help='Pipe n-best list through this command')
parser.add_argument(
'--wrap-stdin', metavar='CMD', help='Pipe stdin through this command')
parser.add_argument(
'--wrap-stdout', metavar='CMD', help='Pipe stdout through this command')
# Help message
if len(sys.argv) == 1:
sys.stderr.write(__doc__)
parser.print_help()
sys.exit(2)
# Parse/split args
(args, moses_args) = parser.parse_known_args()
moses_arg_set = set(moses_args)
# If mert-moses.pl passes -show-weights, just call moses
if '--show-weights' in moses_arg_set or '-show-weights' in moses_arg_set:
sys.stdout.write(subprocess.check_output([args.moses] + moses_args))
sys.stdout.flush()
sys.exit(0)
# Scan moses args and sanity check
input_file = None
input_file_i = None
n_best_list = None
n_best_list_i = None
if not os.path.exists(args.moses):
sys.stderr.write(
'Error: cannot find moses executable at "{}", '
'specify with --moses\n'.format(args.moses))
sys.exit(1)
if args.wrap_input_file and args.wrap_stdin:
sys.stderr.write(
'Error: cannot use both --wrap-input-file and --wrap-stdin\n')
sys.exit(1)
if args.wrap_input_file:
try:
input_file_i = moses_args.index('--input-file') + 1
except ValueError:
sys.stderr.write(
'Error: --wrap-input-file requires --input-file\n')
sys.exit(1)
input_file = moses_args[input_file_i]
if args.wrap_n_best_list:
try:
n_best_list_i = moses_args.index('--n-best-list') + 1
except ValueError:
try:
n_best_list_i = moses_args.index('-n-best-list') + 1
except ValueError:
sys.stderr.write(
'Error: --wrap-n-best-list requires --n-best-list\n')
sys.exit(1)
n_best_list = moses_args[n_best_list_i]
# Don't read from stdin if input file specified
stream_input = not (
'--input-file' in moses_arg_set or '-input-file' in moses_arg_set
or '-i' in moses_arg_set)
# Setup temp dir
tmp = tempfile.mkdtemp(prefix=os.path.join(args.tmp, 'moses.'))
# Preprocess input
moses_input_file = None
if args.wrap_input_file:
moses_input_file = os.path.join(tmp, 'input_file')
subprocess.call('{} <{} >{}'.format(
args.wrap_input_file, input_file, moses_input_file), shell=True)
# Postprocess file name
moses_n_best_list = os.path.join(tmp, 'n_best_list')
# Moses command
moses_cmd = moses_args[:]
if args.wrap_input_file:
moses_cmd[input_file_i] = moses_input_file
if args.wrap_n_best_list:
moses_cmd[n_best_list_i] = moses_n_best_list
moses_cmd = [args.moses] + moses_cmd
# Start processes
wrap_stdin = None
moses_stdin = subprocess.PIPE
if args.wrap_stdin:
wrap_stdin = popen(args.wrap_stdin)
moses_stdin = wrap_stdin.stdout
moses = None
wrap_stdout = None
if args.wrap_stdout:
# Wrap stdout
moses = popen(moses_cmd, shell=False, stdin=moses_stdin)
wrap_stdout = popen(
args.wrap_stdout, stdin=moses.stdout, stdout=sys.stdout)
else:
# Don't wrap stdout
moses = popen(
moses_cmd, shell=False, stdin=moses_stdin, stdout=sys.stdout)
# Run pipeline
stdin = wrap_stdin.stdin if wrap_stdin else moses.stdin
if stream_input:
while True:
line = sys.stdin.readline()
if not line:
break
stdin.write(line)
stdin.flush()
stdin.close()
if wrap_stdin:
wrap_stdin.wait()
moses.wait()
if wrap_stdout:
wrap_stdout.wait()
# Postprocess n-best list
if args.wrap_n_best_list:
subprocess.call('{} <{} >{}'.format(
args.wrap_n_best_list, moses_n_best_list, n_best_list), shell=True)
# Cleanup
shutil.rmtree(tmp)
if __name__ == '__main__':
main()
|