|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Run Moses `score` jobs in parallel. |
|
|
|
This script is a replacement for `score-parallel.perl`. The two are similar, |
|
but there are differences in usage. In addition, this script can be called |
|
directly from Python code without the need to run it as a separate process. |
|
""" |
|
|
|
from __future__ import ( |
|
absolute_import, |
|
print_function, |
|
unicode_literals, |
|
) |
|
|
|
__metaclass__ = type |
|
|
|
from argparse import ArgumentParser |
|
from contextlib import contextmanager |
|
from datetime import datetime |
|
import errno |
|
import gzip |
|
from multiprocessing import Pool |
|
import os |
|
import os.path |
|
import pipes |
|
from shutil import rmtree |
|
from subprocess import check_call |
|
import sys |
|
import tempfile |
|
|
|
|
|
def get_unicode_type(): |
|
"""Return the Unicode string type appropriate to this Python version.""" |
|
if sys.version_info.major <= 2: |
|
|
|
|
|
return unicode |
|
else: |
|
|
|
|
|
return str |
|
|
|
|
|
UNICODE_TYPE = get_unicode_type() |
|
|
|
|
|
class CommandLineError(Exception): |
|
"""Invalid command line.""" |
|
|
|
|
|
class ProgramFailure(Exception): |
|
"""Failure, not a bug, which is reported neatly to the user.""" |
|
|
|
|
|
def parse_args(): |
|
"""Parse command line arguments, return as `Namespace`.""" |
|
parser = ArgumentParser(description=__doc__) |
|
parser.add_argument( |
|
'--extract-file', '-e', metavar='PATH', required=True, |
|
help=( |
|
"Path to input file: extract file (e.g. 'extract.sorted.gz' or " |
|
"'extract.inv.sorted.gz'). Required.")) |
|
parser.add_argument( |
|
'--lex-file', '-l', metavar='PATH', required=True, |
|
help=( |
|
"Path to input file: lex file (e.g. 'lex.f2e' or 'lex.e2f'). " |
|
"Required.")) |
|
parser.add_argument( |
|
'--output', '-o', metavar='PATH', required=True, |
|
help=( |
|
"Write phrase table to file PATH (e.g. 'phrase-table.half.f2e' " |
|
"or 'phrase-table.half.e2f'). Required.")) |
|
parser.add_argument( |
|
'--inverse', '-i', action='store_true', |
|
help="Inverse scoring. Defaults to direct scoring.") |
|
parser.add_argument( |
|
'--labels-file', '-L', metavar='PATH', |
|
help="Also write source labels to file PATH.") |
|
parser.add_argument( |
|
'--parts-of-speech', '-p', metavar='PATH', |
|
help="Also write parts-of-speech file to PATH.") |
|
parser.add_argument( |
|
'--flexibility-score', '-F', metavar='PATH', |
|
help="Path to the 'flexibility_score.py' script. Defaults to none.") |
|
parser.add_argument( |
|
'--hierarchical', '-H', action='store_true', |
|
help="Process hierarchical rules.") |
|
parser.add_argument( |
|
'--args', '-a', metavar='ARGUMENTS', |
|
help="Additional arguments for `score` and `flexibility_score`.") |
|
parser.add_argument( |
|
'--sort', '-s', action='store_true', |
|
help="Sort output file.") |
|
parser.add_argument( |
|
'--jobs', '-j', metavar='N', type=int, default=1, |
|
help="Run up to N jobs in parallel. Defaults to %(default)s.") |
|
parser.add_argument( |
|
'--score-exe', '-x', metavar='PROGRAM', |
|
help="Name of, or path to, the 'score' executable.") |
|
parser.add_argument( |
|
'--sort-command', '-S', metavar='COMMAND-LINE', |
|
help=( |
|
"Command line for sorting text files to standard output. " |
|
"Must support operation as a pipe, as well as input files named " |
|
"as command-line arguments.")) |
|
parser.add_argument( |
|
'--gzip-command', '-z', metavar='PROGRAM', |
|
help="Path to a gzip or pigz executable.") |
|
parser.add_argument( |
|
'--verbose', '-v', action='store_true', |
|
help="Print what's going on.") |
|
parser.add_argument( |
|
'--debug', '-d', action='store_true', |
|
help="Don't delete temporary directories when done.") |
|
return parser.parse_args() |
|
|
|
|
|
def normalize_path(optional_path=None): |
|
"""Return a cleaned-up version of a given filesystem path, or None. |
|
|
|
Converts the path to the operating system's native conventions, and |
|
removes redundancies like `.`. |
|
|
|
The return value will be `None`, an absolute path, or a relative path, |
|
same as the argument. But it will have redundant path separators, |
|
unnecessary detours through parent directories, and use of the current |
|
directory "." removed. |
|
""" |
|
if optional_path is None: |
|
return None |
|
else: |
|
path = os.path.normpath(optional_path) |
|
path = path.replace('/', os.path.sep) |
|
path = path.replace('\\', os.path.sep) |
|
return path |
|
|
|
|
|
def quote(path): |
|
"""Quote and escape a filename for use in a shell command. |
|
|
|
The Windows implementation is very limited and will break on anything |
|
more advanced than a space. |
|
""" |
|
if os.name == 'posix': |
|
return pipes.quote(path) |
|
else: |
|
|
|
return '"%s"' % path |
|
|
|
|
|
def sanitize_args(args): |
|
"""Check `args` for sanity, clean up, and set nontrivial defaults.""" |
|
if args.jobs < 1: |
|
raise CommandLineError("Number of parallel jobs must be 1 or more.") |
|
if args.sort_command is None: |
|
args.sort_command = find_first_executable( |
|
['neandersort', 'gsort', 'sort']) |
|
if args.sort_command is None: |
|
raise CommandLineError( |
|
"No 'sort' command is available. " |
|
"Choose one using the --sort-command option.") |
|
if args.gzip_command is None: |
|
args.gzip_command = find_first_executable(['pigz', 'gzip']) |
|
if args.gzip_command is None: |
|
raise CommandLineError( |
|
"No 'gzip' or 'pigz' command is available. " |
|
"Choose one using the --gzip-command option.") |
|
if args.score_exe is None: |
|
|
|
|
|
moses_dir = os.path.dirname(os.path.dirname( |
|
os.path.abspath(__file__))) |
|
args.score_exe = find_first_executable( |
|
['score'], |
|
[ |
|
moses_dir, |
|
os.path.join(moses_dir, 'phrase-extract'), |
|
os.path.join(moses_dir, 'binaries'), |
|
]) |
|
args.extract_file = normalize_path(args.extract_file) |
|
args.lex_file = normalize_path(args.lex_file) |
|
args.output = normalize_path(args.output) |
|
args.labels_file = normalize_path(args.labels_file) |
|
args.parts_of_speech = normalize_path(args.parts_of_speech) |
|
args.flexibility_score = normalize_path(args.flexibility_score) |
|
args.score_exe = normalize_path(args.score_exe) |
|
|
|
|
|
def add_exe_suffix(program): |
|
"""Return the full filename for an executable. |
|
|
|
On Windows, this adds a `.exe` suffix to the name. On other |
|
systems, it returns the original name unchanged. |
|
""" |
|
if os.name == 'nt': |
|
|
|
return program + '.exe' |
|
else: |
|
|
|
return program |
|
|
|
|
|
def find_executable(exe, extra_path=None): |
|
"""Return full path to an executable of the given name, or `None`. |
|
|
|
If the given name is a qualified path to an executable, it will be returned |
|
unchanged. A qualified path where no executable is found results in a |
|
`CommandLineError`. |
|
""" |
|
if extra_path is None: |
|
extra_path = [] |
|
|
|
if os.path.sep in exe: |
|
|
|
if not os.path.isfile(exe) or not os.access(exe, os.X_OK): |
|
raise CommandLineError("Not an executable: '%s'." % exe) |
|
return exe |
|
|
|
for path in extra_path + os.getenv('PATH').split(os.pathsep): |
|
full_path = os.path.join(path, exe) |
|
if os.access(full_path, os.X_OK): |
|
return full_path |
|
return None |
|
|
|
|
|
def find_first_executable(candidates, extra_path=None): |
|
"""Find the first available of the given candidate programs. |
|
|
|
:raise ProgramFailure: If none of `candidates` was found. |
|
""" |
|
for program in candidates: |
|
executable = find_executable(add_exe_suffix(program), extra_path) |
|
if executable is not None: |
|
return executable |
|
raise ProgramFailure( |
|
"Could not find any of these executables in path: %s." |
|
% ', '.join(candidates)) |
|
|
|
|
|
def execute_shell(command, verbose=False): |
|
"""Run `command` string through the shell. |
|
|
|
Inherits environment, but sets `LC_ALL` to `C` for predictable results, |
|
especially from sort commands. |
|
|
|
This uses a full-featured shell, including pipes, substitution, etc. So |
|
remember to quote/escape arguments where appropriate! |
|
""" |
|
assert isinstance(command, UNICODE_TYPE), ( |
|
"Wrong argument for execute_shell.") |
|
if verbose: |
|
print("Executing: %s" % command) |
|
env = os.environ.copy() |
|
if os.name == 'posix': |
|
env['LC_ALL'] = 'C' |
|
check_call(command, shell=True, env=env) |
|
|
|
|
|
@contextmanager |
|
def tempdir(keep=False): |
|
"""Context manager: temporary directory.""" |
|
directory = tempfile.mkdtemp() |
|
yield directory |
|
if not keep: |
|
rmtree(directory) |
|
|
|
|
|
def make_dirs(path): |
|
"""Equivalent to `mkdir -p -- path`.""" |
|
try: |
|
os.makedirs(path) |
|
except OSError as error: |
|
if error.errno != errno.EEXIST: |
|
raise |
|
|
|
|
|
def open_file(path, mode='r'): |
|
"""Open a file, which may be gzip-compressed.""" |
|
if path.endswith('.gz'): |
|
return gzip.open(path, mode) |
|
else: |
|
return open(path, mode) |
|
|
|
|
|
def count_lines(filename): |
|
"""Count the number of lines in `filename` (may be gzip-compressed).""" |
|
count = 0 |
|
with open_file(filename) as stream: |
|
for _ in stream: |
|
count += 1 |
|
return count |
|
|
|
|
|
def set_temp_dir(): |
|
"""Set temporary directory to `$MOSES_TEMP_DIR`, if set. |
|
|
|
Create the directory if necessary. |
|
""" |
|
temp_dir = os.getenv('MOSES_TEMP_DIR') |
|
if temp_dir is not None: |
|
make_dirs(temp_dir) |
|
tempfile.tempdir = temp_dir |
|
|
|
|
|
def strip_newline(line): |
|
"""Remove trailing carriage return and/or line feed, if present.""" |
|
if line.endswith('\n'): |
|
line = line[:-1] |
|
if line.endswith('\r'): |
|
line = line[:-1] |
|
return line |
|
|
|
|
|
def open_chunk_file(split_dir, chunk_number): |
|
"""Open a file to write one chunk of the extract file.""" |
|
return open_file( |
|
os.path.join(split_dir, 'extract.%d.gz' % chunk_number), 'w') |
|
|
|
|
|
def name_context_chunk_file(split_dir, chunk_number): |
|
"""Compose file name for one chunk of the extract context file.""" |
|
return os.path.join( |
|
split_dir, 'extract.context.%d.gz' % chunk_number) |
|
|
|
|
|
def extract_source_phrase(line): |
|
"""Extract the source phrase from an extract-file line.""" |
|
return line.split(b'|||', 1)[0] |
|
|
|
|
|
def cut_context_file(last_source_phrase, chunk_file, last_line, |
|
context_stream): |
|
"""Write one chunk of extract context file into its own file. |
|
|
|
:param last_source_phrase: Last source phrase that should be in the |
|
chunk. Stop processing after this source phrase. |
|
:param chunk_file: Path to the extract context file for this chunk. |
|
:param last_line: Previously read line that may still need writing. |
|
:param context_stream: Extract context file, opened for reading. |
|
:return: Last line read from `context_stream`. This line will still |
|
need processing. |
|
""" |
|
|
|
with gzip.open(chunk_file, 'w') as chunk: |
|
if last_line is not None: |
|
chunk.write('%s\n' % last_line) |
|
|
|
|
|
on_last_source_phrase = False |
|
|
|
|
|
|
|
for line in context_stream: |
|
|
|
|
|
line = strip_newline(line) |
|
source_phrase = extract_source_phrase(line) |
|
if on_last_source_phrase and source_phrase != last_source_phrase: |
|
|
|
return line |
|
else: |
|
|
|
chunk.write('%s\n' % line) |
|
if source_phrase == last_source_phrase: |
|
|
|
on_last_source_phrase = True |
|
|
|
|
|
def split_extract_files(split_dir, extract_file, extract_context_file=None, |
|
jobs=1): |
|
"""Split extract file into chunks, so we can process them in parallel. |
|
|
|
:param split_dir: A temporary directory where this function can write |
|
temporary files. The caller must ensure that this directory will be |
|
cleaned up after it's done with the files. |
|
:return: An iterable of tuples. Each tuple hols a partial extract file, |
|
and the corresponding context file. The files may be in `split_dir`, |
|
or there may just be the original extract file. |
|
""" |
|
if jobs == 1: |
|
|
|
return [(extract_file, extract_context_file)] |
|
|
|
|
|
files = [] |
|
num_lines = count_lines(extract_file) |
|
chunk_size = (num_lines + jobs - 1) / jobs |
|
assert isinstance(chunk_size, int) |
|
|
|
line_count = 0 |
|
chunk_number = 0 |
|
prev_source_phrase = None |
|
last_line_context = None |
|
extract_stream = open_file(extract_file) |
|
chunk_file = open_chunk_file(split_dir, chunk_number) |
|
if extract_context_file is None: |
|
chunk_context_file = None |
|
if extract_context_file is not None: |
|
context_stream = open_file(extract_context_file) |
|
|
|
for line in extract_stream: |
|
line_count += 1 |
|
line = line.decode('utf-8') |
|
line = strip_newline(line) |
|
if line_count >= chunk_size: |
|
|
|
source_phrase = extract_source_phrase(line) |
|
if prev_source_phrase is None: |
|
|
|
prev_source_phrase = source_phrase |
|
elif source_phrase == prev_source_phrase: |
|
|
|
pass |
|
else: |
|
|
|
|
|
chunk_file.close() |
|
if extract_context_file is not None: |
|
chunk_context_file = name_context_chunk_file( |
|
split_dir, chunk_number) |
|
last_line_context = cut_context_file( |
|
prev_source_phrase, chunk_context_file, |
|
last_line_context, context_stream) |
|
files.append((chunk_file.name, chunk_context_file)) |
|
|
|
|
|
prev_source_phrase = None |
|
line_count = 0 |
|
chunk_number += 1 |
|
chunk_file = open_chunk_file(split_dir, chunk_number) |
|
chunk_file.write(('%s\n' % line).encode('utf-8')) |
|
|
|
chunk_file.close() |
|
if extract_context_file is not None: |
|
chunk_context_file = name_context_chunk_file(split_dir, chunk_number) |
|
last_line_context = cut_context_file( |
|
prev_source_phrase, chunk_number, last_line_context, |
|
context_stream) |
|
files.append((chunk_file.name, chunk_context_file)) |
|
return files |
|
|
|
|
|
def compose_score_command(extract_file, context_file, half_file, |
|
flex_half_file, args): |
|
"""Compose command line text to run one instance of `score`. |
|
|
|
:param extract_file: One chunk of extract file. |
|
:param context_file: If doing flexibility scoring, one chunk of |
|
extract context file. Otherwise, None. |
|
:param half_file: ??? |
|
:param flex_half_file: ??? |
|
:param args: Arguments namespace. |
|
""" |
|
command = [ |
|
args.score_exe, |
|
extract_file, |
|
args.lex_file, |
|
half_file, |
|
] |
|
if args.args not in (None, ''): |
|
command.append(args.args) |
|
other_args = build_score_args(args) |
|
if other_args != '': |
|
command.append(other_args) |
|
if context_file is not None: |
|
command += [ |
|
'&&', |
|
find_first_executable(['bzcat']), |
|
half_file, |
|
'|', |
|
quote(args.flexibility_score), |
|
quote(context_file), |
|
] |
|
if args.inverse: |
|
command.append('--Inverse') |
|
if args.hierarchical: |
|
command.append('--Hierarchical') |
|
command += [ |
|
'|', |
|
quote(args.gzip_command), |
|
'-c', |
|
'>%s' % quote(flex_half_file), |
|
] |
|
return ' '.join(command) |
|
|
|
|
|
def score_parallel(split_dir, file_pairs, args): |
|
"""Run the `score` command in parallel. |
|
|
|
:param split_dir: Temporary directory where we can create split files. |
|
:param file_pairs: Sequence of tuples for the input files, one tuple |
|
per chunk of the work. Each tuple consists of a partial extract |
|
file, and optionally a partial extract context file. |
|
:param args: Arguments namespace. |
|
:return: A list of tuples. Each tuple contains two file paths. The first |
|
is for a partial half-phrase-table file. The second is for the |
|
corresponding partial flex file, if a context file is given; or |
|
`None` otherwise. |
|
""" |
|
partial_files = [] |
|
|
|
|
|
pool = Pool(args.jobs) |
|
try: |
|
for chunk_num, file_pair in enumerate(file_pairs): |
|
half_file = os.path.join( |
|
split_dir, 'phrase-table.half.%06d.gz' % chunk_num) |
|
extract_file, context_file = file_pair |
|
if context_file is None: |
|
flex_half_file = None |
|
else: |
|
flex_half_file = os.path.join( |
|
split_dir, 'phrase-table.half.%06d.flex.gz' % chunk_num) |
|
|
|
|
|
|
|
command_line = compose_score_command( |
|
extract_file, context_file, half_file, flex_half_file, args) |
|
pool.apply_async( |
|
execute_shell, (command_line, ), {'verbose': args.verbose}) |
|
partial_files.append((half_file, flex_half_file)) |
|
pool.close() |
|
except BaseException: |
|
pool.terminate() |
|
raise |
|
finally: |
|
pool.join() |
|
return partial_files |
|
|
|
|
|
def merge_and_sort(files, output, sort_command=None, gzip_exe=None, |
|
verbose=False): |
|
"""Merge partial files. |
|
|
|
:param files: List of partial half-phrase-table files. |
|
:param output: Path for resulting combined phrase-table file. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
if sort_command == 'neandersort': |
|
|
|
check_call([ |
|
'neandersort', |
|
'-o', output, |
|
] + files) |
|
else: |
|
command = ( |
|
"%(gzip)s -c -d %(files)s | " |
|
"%(sort)s | " |
|
"%(gzip)s -c >>%(output)s" |
|
% { |
|
'gzip': quote(gzip_exe), |
|
'sort': sort_command, |
|
'files': ' '.join(map(quote, files)), |
|
'output': quote(output), |
|
}) |
|
execute_shell(command, verbose=verbose) |
|
|
|
|
|
def build_score_args(args): |
|
"""Compose command line for the `score` program.""" |
|
command_line = [] |
|
if args.labels_file: |
|
command_line += [ |
|
'--SourceLabels', |
|
'--SourceLabelCountsLHS', |
|
'--SourceLabelSet', |
|
] |
|
if args.parts_of_speech: |
|
command_line.append('--PartsOfSpeech') |
|
if args.inverse: |
|
command_line.append('--Inverse') |
|
if args.args is not None: |
|
command_line.append(args.args) |
|
return ' '.join(command_line) |
|
|
|
|
|
def list_existing(paths): |
|
"""Return, in the same order, those of the given files which exist.""" |
|
return filter(os.path.exists, paths) |
|
|
|
|
|
def compose_coc_path_for(path): |
|
"""Compose COC-file path for the given file.""" |
|
return '%s.coc' % path |
|
|
|
|
|
def read_cocs(path): |
|
"""Read COC file at `path`, return contents as tuple of ints.""" |
|
with open(path) as lines: |
|
return tuple( |
|
int(line.rstrip('\r\n')) |
|
for line in lines |
|
) |
|
|
|
|
|
def add_cocs(original, additional): |
|
"""Add two tuples of COCs. Extend as needed.""" |
|
assert not (original is None and additional is None), "No COCs to add!" |
|
if original is None: |
|
return additional |
|
elif additional is None: |
|
return original |
|
else: |
|
common = tuple(lhs + rhs for lhs, rhs in zip(original, additional)) |
|
return ( |
|
common + |
|
tuple(original[len(common):]) + |
|
tuple(additional[len(common):])) |
|
|
|
|
|
def merge_coc(files, output): |
|
"""Merge COC files for the given partial files. |
|
|
|
Each COC file is a series of integers, one per line. This reads them, and |
|
adds them up line-wise into one file of the same format: the sum of the |
|
numbers the respective files have at line 1, the sum of the numbers the |
|
respective files have at line 2, and so on. |
|
""" |
|
assert len(files) > 0, "No partial files - no work to do." |
|
extract_files = [extract_file for extract_file, _ in files] |
|
if not os.path.exists(compose_coc_path_for(extract_files[0])): |
|
|
|
return |
|
totals = None |
|
|
|
for coc_path in list_existing(map(compose_coc_path_for, extract_files)): |
|
totals = add_cocs(totals, read_cocs(coc_path)) |
|
|
|
|
|
with open(output, 'w') as output_stream: |
|
for entry in totals: |
|
output_stream.write('%d\n' % entry) |
|
|
|
|
|
def suffix_line_numbers(infile, outfile): |
|
"""Rewrite `infile` to `outfile`; suffix line number to each line. |
|
|
|
The line number is zero-based, and separated from the rest of the line |
|
by a single space. |
|
""" |
|
temp_file = '%s.numbering' % outfile |
|
with open(infile, 'r') as instream, open(outfile, 'w') as outstream: |
|
line_no = 0 |
|
for line in instream: |
|
outstream.write(line) |
|
outstream.write(' %d\n' % line_no) |
|
line_no += 1 |
|
os.rename(temp_file, outfile) |
|
|
|
|
|
def compose_source_labels_path_for(path): |
|
"""Return source labels file path for given file.""" |
|
return '%s.syntaxLabels.src' % path |
|
|
|
|
|
def merge_numbered_files(inputs, output, header_lines, sort_command, |
|
verbose=False): |
|
"""Sort and merge files `inputs`, add header and line numbers. |
|
|
|
:param inputs: Iterable of input files. |
|
:param output: Output file. |
|
:header_lines: Iterable of header lines. |
|
:sort_command: Command line for sorting input files. |
|
""" |
|
sort_temp = '%s.sorting' % output |
|
with open(sort_temp, 'w') as stream: |
|
for line in header_lines: |
|
stream.write(line) |
|
stream.write('\n') |
|
execute_shell( |
|
"%s %s >>%s" % ( |
|
sort_command, |
|
' '.join(map(quote, inputs)), |
|
quote(sort_temp)), |
|
verbose=verbose) |
|
suffix_line_numbers(sort_temp, output) |
|
|
|
|
|
def merge_source_labels(files, output, sort_command, verbose=False): |
|
"""Merge source labels files.""" |
|
|
|
labels_files = list_existing(map(compose_source_labels_path_for, files)) |
|
header = [ |
|
'GlueTop', |
|
'GlueX', |
|
'SSTART', |
|
'SEND', |
|
] |
|
merge_numbered_files( |
|
labels_files, output, header, sort_command, verbose=verbose) |
|
|
|
|
|
def compose_parts_of_speech_path_for(path): |
|
"""Return parts-of-speech file path for given file.""" |
|
return '%s.partsOfSpeech' % path |
|
|
|
|
|
def merge_parts_of_speech(files, output, sort_command, verbose=False): |
|
"""Merge parts-of-speech files into output.""" |
|
|
|
parts_files = list_existing(map(compose_parts_of_speech_path_for, files)) |
|
header = [ |
|
'SSTART', |
|
'SEND', |
|
] |
|
merge_numbered_files( |
|
parts_files, output, header, sort_command, verbose=verbose) |
|
|
|
|
|
def main(): |
|
"""Command-line entry point. Marshals and forwards to `score_parallel`.""" |
|
args = parse_args() |
|
sanitize_args(args) |
|
set_temp_dir() |
|
|
|
if args.flexibility_score is None: |
|
extract_context_file = None |
|
else: |
|
extract_context_file = args.extract_file.replace( |
|
'extract.', 'extract.context.') |
|
|
|
if args.verbose: |
|
print("Started %s." % datetime.now()) |
|
print("Using '%s' for gzip." % args.gzip_command) |
|
|
|
with tempdir(args.debug) as split_dir: |
|
extract_files = split_extract_files( |
|
split_dir, args.extract_file, |
|
extract_context_file=extract_context_file, jobs=args.jobs) |
|
|
|
scored_files = score_parallel(split_dir, extract_files, args) |
|
|
|
if args.verbose: |
|
sys.stderr.write("Finished score %s.\n" % datetime.now()) |
|
|
|
|
|
merge_and_sort( |
|
[phrase_chunk for phrase_chunk, _ in scored_files], args.output, |
|
sort_command=args.sort_command, gzip_exe=args.gzip_command, |
|
verbose=args.verbose) |
|
merge_coc(extract_files, compose_coc_path_for(args.output)) |
|
|
|
if not args.inverse and args.labels_file is not None: |
|
if args.verbose: |
|
print("Merging source labels files.") |
|
merge_source_labels( |
|
extract_files, args.labels_file, |
|
sort_command=args.sort_command, verbose=args.verbose) |
|
|
|
if not args.inverse and args.parts_of_speech is not None: |
|
if args.verbose: |
|
print("Merging parts-of-speech files.") |
|
merge_parts_of_speech( |
|
extract_files, args.parts_of_speech, |
|
sort_command=args.sort_command, verbose=args.verbose) |
|
|
|
|
|
if __name__ == '__main__': |
|
try: |
|
main() |
|
except ProgramFailure as error: |
|
sys.stderr.write('%s\n' % error) |
|
sys.exit(1) |
|
except CommandLineError as error: |
|
sys.stderr.write("Command line error: %s\n" % error) |
|
sys.exit(2) |
|
|