File size: 5,666 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

r'''Generic Moses Wrapper

Run moses, wrapping various inputs and outputs
(useful as decoder-executable for mert-moses.pl)

mert-moses.pl \
  --decoder wrap_moses.py --input src --refs ref --config moses.ini \
  --decoder-flags="--wrap-input-file my_preproc_script.sh \
  --wrap-n-best-list my_postproc_script.sh"

Commands are run through shell, so they may contain multiple piped commands

Anything not in the following list is passed through to moses as decoder flags
'''

import argparse
import os
import shutil
import subprocess
import sys
import tempfile

# ../../bin/moses
MOSES = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(
    os.path.abspath(__file__)))), 'bin', 'moses')


def popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE):
    '''Open command for streaming'''
    return subprocess.Popen(cmd, shell=shell, stdin=stdin, stdout=stdout)


def main():

    # Special args
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--moses', help='Moses executable (default: {})'.format(MOSES),
        default=MOSES)
    parser.add_argument(
        '--tmp', help='Temp directory parent (default: /tmp)', default='/tmp')
    parser.add_argument(
        '--wrap-input-file', metavar='CMD',
        help='Pipe input file through this command')
    parser.add_argument(
        '--wrap-n-best-list', metavar='CMD',
        help='Pipe n-best list through this command')
    parser.add_argument(
        '--wrap-stdin', metavar='CMD', help='Pipe stdin through this command')
    parser.add_argument(
        '--wrap-stdout', metavar='CMD', help='Pipe stdout through this command')

    # Help message
    if len(sys.argv) == 1:
        sys.stderr.write(__doc__)
        parser.print_help()
        sys.exit(2)

    # Parse/split args
    (args, moses_args) = parser.parse_known_args()
    moses_arg_set = set(moses_args)

    # If mert-moses.pl passes -show-weights, just call moses
    if '--show-weights' in moses_arg_set or '-show-weights' in moses_arg_set:
        sys.stdout.write(subprocess.check_output([args.moses] + moses_args))
        sys.stdout.flush()
        sys.exit(0)

    # Scan moses args and sanity check
    input_file = None
    input_file_i = None
    n_best_list = None
    n_best_list_i = None
    if not os.path.exists(args.moses):
        sys.stderr.write(
            'Error: cannot find moses executable at "{}", '
            'specify with --moses\n'.format(args.moses))
        sys.exit(1)
    if args.wrap_input_file and args.wrap_stdin:
        sys.stderr.write(
            'Error: cannot use both --wrap-input-file and --wrap-stdin\n')
        sys.exit(1)
    if args.wrap_input_file:
        try:
            input_file_i = moses_args.index('--input-file') + 1
        except ValueError:
            sys.stderr.write(
                'Error: --wrap-input-file requires --input-file\n')
            sys.exit(1)
        input_file = moses_args[input_file_i]
    if args.wrap_n_best_list:
        try:
            n_best_list_i = moses_args.index('--n-best-list') + 1
        except ValueError:
            try:
                n_best_list_i = moses_args.index('-n-best-list') + 1
            except ValueError:
                sys.stderr.write(
                    'Error: --wrap-n-best-list requires --n-best-list\n')
                sys.exit(1)
        n_best_list = moses_args[n_best_list_i]
    # Don't read from stdin if input file specified
    stream_input = not (
        '--input-file' in moses_arg_set or '-input-file' in moses_arg_set
        or '-i' in moses_arg_set)

    # Setup temp dir
    tmp = tempfile.mkdtemp(prefix=os.path.join(args.tmp, 'moses.'))

    # Preprocess input
    moses_input_file = None
    if args.wrap_input_file:
        moses_input_file = os.path.join(tmp, 'input_file')
        subprocess.call('{} <{} >{}'.format(
            args.wrap_input_file, input_file, moses_input_file), shell=True)
    # Postprocess file name
    moses_n_best_list = os.path.join(tmp, 'n_best_list')

    # Moses command
    moses_cmd = moses_args[:]
    if args.wrap_input_file:
        moses_cmd[input_file_i] = moses_input_file
    if args.wrap_n_best_list:
        moses_cmd[n_best_list_i] = moses_n_best_list
    moses_cmd = [args.moses] + moses_cmd

    # Start processes
    wrap_stdin = None
    moses_stdin = subprocess.PIPE
    if args.wrap_stdin:
        wrap_stdin = popen(args.wrap_stdin)
        moses_stdin = wrap_stdin.stdout
    moses = None
    wrap_stdout = None
    if args.wrap_stdout:
        # Wrap stdout
        moses = popen(moses_cmd, shell=False, stdin=moses_stdin)
        wrap_stdout = popen(
            args.wrap_stdout, stdin=moses.stdout, stdout=sys.stdout)
    else:
        # Don't wrap stdout
        moses = popen(
            moses_cmd, shell=False, stdin=moses_stdin, stdout=sys.stdout)

    # Run pipeline
    stdin = wrap_stdin.stdin if wrap_stdin else moses.stdin
    if stream_input:
        while True:
            line = sys.stdin.readline()
            if not line:
                break
            stdin.write(line)
            stdin.flush()
    stdin.close()
    if wrap_stdin:
        wrap_stdin.wait()
    moses.wait()
    if wrap_stdout:
        wrap_stdout.wait()

    # Postprocess n-best list
    if args.wrap_n_best_list:
        subprocess.call('{} <{} >{}'.format(
            args.wrap_n_best_list, moses_n_best_list, n_best_list), shell=True)

    # Cleanup
    shutil.rmtree(tmp)


if __name__ == '__main__':
    main()