File size: 27,079 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 |
#! /usr/bin/env python
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
#
# Script contributed by Precision Translation Tools.
"""Run Moses `score` jobs in parallel.
This script is a replacement for `score-parallel.perl`. The two are similar,
but there are differences in usage. In addition, this script can be called
directly from Python code without the need to run it as a separate process.
"""
from __future__ import (
absolute_import,
print_function,
unicode_literals,
)
__metaclass__ = type
from argparse import ArgumentParser
from contextlib import contextmanager
from datetime import datetime
import errno
import gzip
from multiprocessing import Pool
import os
import os.path
import pipes
from shutil import rmtree
from subprocess import check_call
import sys
import tempfile
def get_unicode_type():
"""Return the Unicode string type appropriate to this Python version."""
if sys.version_info.major <= 2:
# Unicode string type. In Python 2 this is the "unicode" type,
# while "str" is a binary string type.
return unicode
else:
# Unicode string type. In Python 3 this is the default "str" type.
# The binary string type is now called "bytes".
return str
UNICODE_TYPE = get_unicode_type()
class CommandLineError(Exception):
"""Invalid command line."""
class ProgramFailure(Exception):
"""Failure, not a bug, which is reported neatly to the user."""
def parse_args():
"""Parse command line arguments, return as `Namespace`."""
parser = ArgumentParser(description=__doc__)
parser.add_argument(
'--extract-file', '-e', metavar='PATH', required=True,
help=(
"Path to input file: extract file (e.g. 'extract.sorted.gz' or "
"'extract.inv.sorted.gz'). Required."))
parser.add_argument(
'--lex-file', '-l', metavar='PATH', required=True,
help=(
"Path to input file: lex file (e.g. 'lex.f2e' or 'lex.e2f'). "
"Required."))
parser.add_argument(
'--output', '-o', metavar='PATH', required=True,
help=(
"Write phrase table to file PATH (e.g. 'phrase-table.half.f2e' "
"or 'phrase-table.half.e2f'). Required."))
parser.add_argument(
'--inverse', '-i', action='store_true',
help="Inverse scoring. Defaults to direct scoring.")
parser.add_argument(
'--labels-file', '-L', metavar='PATH',
help="Also write source labels to file PATH.")
parser.add_argument(
'--parts-of-speech', '-p', metavar='PATH',
help="Also write parts-of-speech file to PATH.")
parser.add_argument(
'--flexibility-score', '-F', metavar='PATH',
help="Path to the 'flexibility_score.py' script. Defaults to none.")
parser.add_argument(
'--hierarchical', '-H', action='store_true',
help="Process hierarchical rules.")
parser.add_argument(
'--args', '-a', metavar='ARGUMENTS',
help="Additional arguments for `score` and `flexibility_score`.")
parser.add_argument(
'--sort', '-s', action='store_true',
help="Sort output file.")
parser.add_argument(
'--jobs', '-j', metavar='N', type=int, default=1,
help="Run up to N jobs in parallel. Defaults to %(default)s.")
parser.add_argument(
'--score-exe', '-x', metavar='PROGRAM',
help="Name of, or path to, the 'score' executable.")
parser.add_argument(
'--sort-command', '-S', metavar='COMMAND-LINE',
help=(
"Command line for sorting text files to standard output. "
"Must support operation as a pipe, as well as input files named "
"as command-line arguments."))
parser.add_argument(
'--gzip-command', '-z', metavar='PROGRAM',
help="Path to a gzip or pigz executable.")
parser.add_argument(
'--verbose', '-v', action='store_true',
help="Print what's going on.")
parser.add_argument(
'--debug', '-d', action='store_true',
help="Don't delete temporary directories when done.")
return parser.parse_args()
def normalize_path(optional_path=None):
"""Return a cleaned-up version of a given filesystem path, or None.
Converts the path to the operating system's native conventions, and
removes redundancies like `.`.
The return value will be `None`, an absolute path, or a relative path,
same as the argument. But it will have redundant path separators,
unnecessary detours through parent directories, and use of the current
directory "." removed.
"""
if optional_path is None:
return None
else:
path = os.path.normpath(optional_path)
path = path.replace('/', os.path.sep)
path = path.replace('\\', os.path.sep)
return path
def quote(path):
"""Quote and escape a filename for use in a shell command.
The Windows implementation is very limited and will break on anything
more advanced than a space.
"""
if os.name == 'posix':
return pipes.quote(path)
else:
# TODO: Improve escaping for Windows.
return '"%s"' % path
def sanitize_args(args):
"""Check `args` for sanity, clean up, and set nontrivial defaults."""
if args.jobs < 1:
raise CommandLineError("Number of parallel jobs must be 1 or more.")
if args.sort_command is None:
args.sort_command = find_first_executable(
['neandersort', 'gsort', 'sort'])
if args.sort_command is None:
raise CommandLineError(
"No 'sort' command is available. "
"Choose one using the --sort-command option.")
if args.gzip_command is None:
args.gzip_command = find_first_executable(['pigz', 'gzip'])
if args.gzip_command is None:
raise CommandLineError(
"No 'gzip' or 'pigz' command is available. "
"Choose one using the --gzip-command option.")
if args.score_exe is None:
# Look for "score" executable. It may be in the current project
# directory somewhere, or in the PATH.
moses_dir = os.path.dirname(os.path.dirname(
os.path.abspath(__file__)))
args.score_exe = find_first_executable(
['score'],
[
moses_dir,
os.path.join(moses_dir, 'phrase-extract'),
os.path.join(moses_dir, 'binaries'),
])
args.extract_file = normalize_path(args.extract_file)
args.lex_file = normalize_path(args.lex_file)
args.output = normalize_path(args.output)
args.labels_file = normalize_path(args.labels_file)
args.parts_of_speech = normalize_path(args.parts_of_speech)
args.flexibility_score = normalize_path(args.flexibility_score)
args.score_exe = normalize_path(args.score_exe)
def add_exe_suffix(program):
"""Return the full filename for an executable.
On Windows, this adds a `.exe` suffix to the name. On other
systems, it returns the original name unchanged.
"""
if os.name == 'nt':
# Windows.
return program + '.exe'
else:
# Assume POSIX or similar.
return program
def find_executable(exe, extra_path=None):
"""Return full path to an executable of the given name, or `None`.
If the given name is a qualified path to an executable, it will be returned
unchanged. A qualified path where no executable is found results in a
`CommandLineError`.
"""
if extra_path is None:
extra_path = []
if os.path.sep in exe:
# The executable name includes a path. Only one place it can be.
if not os.path.isfile(exe) or not os.access(exe, os.X_OK):
raise CommandLineError("Not an executable: '%s'." % exe)
return exe
for path in extra_path + os.getenv('PATH').split(os.pathsep):
full_path = os.path.join(path, exe)
if os.access(full_path, os.X_OK):
return full_path
return None
def find_first_executable(candidates, extra_path=None):
"""Find the first available of the given candidate programs.
:raise ProgramFailure: If none of `candidates` was found.
"""
for program in candidates:
executable = find_executable(add_exe_suffix(program), extra_path)
if executable is not None:
return executable
raise ProgramFailure(
"Could not find any of these executables in path: %s."
% ', '.join(candidates))
def execute_shell(command, verbose=False):
"""Run `command` string through the shell.
Inherits environment, but sets `LC_ALL` to `C` for predictable results,
especially from sort commands.
This uses a full-featured shell, including pipes, substitution, etc. So
remember to quote/escape arguments where appropriate!
"""
assert isinstance(command, UNICODE_TYPE), (
"Wrong argument for execute_shell.")
if verbose:
print("Executing: %s" % command)
env = os.environ.copy()
if os.name == 'posix':
env['LC_ALL'] = 'C'
check_call(command, shell=True, env=env)
@contextmanager
def tempdir(keep=False):
"""Context manager: temporary directory."""
directory = tempfile.mkdtemp()
yield directory
if not keep:
rmtree(directory)
def make_dirs(path):
"""Equivalent to `mkdir -p -- path`."""
try:
os.makedirs(path)
except OSError as error:
if error.errno != errno.EEXIST:
raise
def open_file(path, mode='r'):
"""Open a file, which may be gzip-compressed."""
if path.endswith('.gz'):
return gzip.open(path, mode)
else:
return open(path, mode)
def count_lines(filename):
"""Count the number of lines in `filename` (may be gzip-compressed)."""
count = 0
with open_file(filename) as stream:
for _ in stream:
count += 1
return count
def set_temp_dir():
"""Set temporary directory to `$MOSES_TEMP_DIR`, if set.
Create the directory if necessary.
"""
temp_dir = os.getenv('MOSES_TEMP_DIR')
if temp_dir is not None:
make_dirs(temp_dir)
tempfile.tempdir = temp_dir
def strip_newline(line):
"""Remove trailing carriage return and/or line feed, if present."""
if line.endswith('\n'):
line = line[:-1]
if line.endswith('\r'):
line = line[:-1]
return line
def open_chunk_file(split_dir, chunk_number):
"""Open a file to write one chunk of the extract file."""
return open_file(
os.path.join(split_dir, 'extract.%d.gz' % chunk_number), 'w')
def name_context_chunk_file(split_dir, chunk_number):
"""Compose file name for one chunk of the extract context file."""
return os.path.join(
split_dir, 'extract.context.%d.gz' % chunk_number)
def extract_source_phrase(line):
"""Extract the source phrase from an extract-file line."""
return line.split(b'|||', 1)[0]
def cut_context_file(last_source_phrase, chunk_file, last_line,
context_stream):
"""Write one chunk of extract context file into its own file.
:param last_source_phrase: Last source phrase that should be in the
chunk. Stop processing after this source phrase.
:param chunk_file: Path to the extract context file for this chunk.
:param last_line: Previously read line that may still need writing.
:param context_stream: Extract context file, opened for reading.
:return: Last line read from `context_stream`. This line will still
need processing.
"""
# TODO: Use open_file.
with gzip.open(chunk_file, 'w') as chunk:
if last_line is not None:
chunk.write('%s\n' % last_line)
# Are we processing our last source phrase yet?
on_last_source_phrase = False
# Write all lines in context file until we meet last source phrase
# in extract file.
for line in context_stream:
# Reading from a gzip file returns lines *including the newline*.
# Either way, we want to ignore carriage returns as well.
line = strip_newline(line)
source_phrase = extract_source_phrase(line)
if on_last_source_phrase and source_phrase != last_source_phrase:
# First new source phrase after our last one. We're done.
return line
else:
# Still adding lines to our chunk.
chunk.write('%s\n' % line)
if source_phrase == last_source_phrase:
# We're on our last source phrase now.
on_last_source_phrase = True
def split_extract_files(split_dir, extract_file, extract_context_file=None,
jobs=1):
"""Split extract file into chunks, so we can process them in parallel.
:param split_dir: A temporary directory where this function can write
temporary files. The caller must ensure that this directory will be
cleaned up after it's done with the files.
:return: An iterable of tuples. Each tuple hols a partial extract file,
and the corresponding context file. The files may be in `split_dir`,
or there may just be the original extract file.
"""
if jobs == 1:
# No splitting needed. Read the original file(s).
return [(extract_file, extract_context_file)]
# Otherwise: split files.
files = []
num_lines = count_lines(extract_file)
chunk_size = (num_lines + jobs - 1) / jobs
assert isinstance(chunk_size, int)
line_count = 0
chunk_number = 0
prev_source_phrase = None
last_line_context = None
extract_stream = open_file(extract_file)
chunk_file = open_chunk_file(split_dir, chunk_number)
if extract_context_file is None:
chunk_context_file = None
if extract_context_file is not None:
context_stream = open_file(extract_context_file)
for line in extract_stream:
line_count += 1
line = line.decode('utf-8')
line = strip_newline(line)
if line_count >= chunk_size:
# At or over chunk size. Cut off at next source phrase change.
source_phrase = extract_source_phrase(line)
if prev_source_phrase is None:
# Start looking for a different source phrase.
prev_source_phrase = source_phrase
elif source_phrase == prev_source_phrase:
# Can't cut yet. Still working on the same source phrase.
pass
else:
# Hit first new source phrase after chunk limit. Cut new
# file(s).
chunk_file.close()
if extract_context_file is not None:
chunk_context_file = name_context_chunk_file(
split_dir, chunk_number)
last_line_context = cut_context_file(
prev_source_phrase, chunk_context_file,
last_line_context, context_stream)
files.append((chunk_file.name, chunk_context_file))
# Start on new chunk.
prev_source_phrase = None
line_count = 0
chunk_number += 1
chunk_file = open_chunk_file(split_dir, chunk_number)
chunk_file.write(('%s\n' % line).encode('utf-8'))
chunk_file.close()
if extract_context_file is not None:
chunk_context_file = name_context_chunk_file(split_dir, chunk_number)
last_line_context = cut_context_file(
prev_source_phrase, chunk_number, last_line_context,
context_stream)
files.append((chunk_file.name, chunk_context_file))
return files
def compose_score_command(extract_file, context_file, half_file,
flex_half_file, args):
"""Compose command line text to run one instance of `score`.
:param extract_file: One chunk of extract file.
:param context_file: If doing flexibility scoring, one chunk of
extract context file. Otherwise, None.
:param half_file: ???
:param flex_half_file: ???
:param args: Arguments namespace.
"""
command = [
args.score_exe,
extract_file,
args.lex_file,
half_file,
]
if args.args not in (None, ''):
command.append(args.args)
other_args = build_score_args(args)
if other_args != '':
command.append(other_args)
if context_file is not None:
command += [
'&&',
find_first_executable(['bzcat']),
half_file,
'|',
quote(args.flexibility_score),
quote(context_file),
]
if args.inverse:
command.append('--Inverse')
if args.hierarchical:
command.append('--Hierarchical')
command += [
'|',
quote(args.gzip_command),
'-c',
'>%s' % quote(flex_half_file),
]
return ' '.join(command)
def score_parallel(split_dir, file_pairs, args):
"""Run the `score` command in parallel.
:param split_dir: Temporary directory where we can create split files.
:param file_pairs: Sequence of tuples for the input files, one tuple
per chunk of the work. Each tuple consists of a partial extract
file, and optionally a partial extract context file.
:param args: Arguments namespace.
:return: A list of tuples. Each tuple contains two file paths. The first
is for a partial half-phrase-table file. The second is for the
corresponding partial flex file, if a context file is given; or
`None` otherwise.
"""
partial_files = []
# Pool of worker processes for executing the partial "score" invocations
# concurrently.
pool = Pool(args.jobs)
try:
for chunk_num, file_pair in enumerate(file_pairs):
half_file = os.path.join(
split_dir, 'phrase-table.half.%06d.gz' % chunk_num)
extract_file, context_file = file_pair
if context_file is None:
flex_half_file = None
else:
flex_half_file = os.path.join(
split_dir, 'phrase-table.half.%06d.flex.gz' % chunk_num)
# Pickling of arguments for the pool is awkward on Windows, so
# keep them simple. Compose the command line in the parent
# process, then hand them to worker processes which execute them.
command_line = compose_score_command(
extract_file, context_file, half_file, flex_half_file, args)
pool.apply_async(
execute_shell, (command_line, ), {'verbose': args.verbose})
partial_files.append((half_file, flex_half_file))
pool.close()
except BaseException:
pool.terminate()
raise
finally:
pool.join()
return partial_files
def merge_and_sort(files, output, sort_command=None, gzip_exe=None,
verbose=False):
"""Merge partial files.
:param files: List of partial half-phrase-table files.
:param output: Path for resulting combined phrase-table file.
"""
# TODO: The Perl code mentioned "sort" and "flexibility_score" here.
# What do we do with those?
# Sort whether we're asked to or not, as a way of combining the input
# files.
if sort_command == 'neandersort':
# Neandersort transparently decompresses input and compresses output.
check_call([
'neandersort',
'-o', output,
] + files)
else:
command = (
"%(gzip)s -c -d %(files)s | "
"%(sort)s | "
"%(gzip)s -c >>%(output)s"
% {
'gzip': quote(gzip_exe),
'sort': sort_command,
'files': ' '.join(map(quote, files)),
'output': quote(output),
})
execute_shell(command, verbose=verbose)
def build_score_args(args):
"""Compose command line for the `score` program."""
command_line = []
if args.labels_file:
command_line += [
'--SourceLabels',
'--SourceLabelCountsLHS',
'--SourceLabelSet',
]
if args.parts_of_speech:
command_line.append('--PartsOfSpeech')
if args.inverse:
command_line.append('--Inverse')
if args.args is not None:
command_line.append(args.args)
return ' '.join(command_line)
def list_existing(paths):
"""Return, in the same order, those of the given files which exist."""
return filter(os.path.exists, paths)
def compose_coc_path_for(path):
"""Compose COC-file path for the given file."""
return '%s.coc' % path
def read_cocs(path):
"""Read COC file at `path`, return contents as tuple of ints."""
with open(path) as lines:
return tuple(
int(line.rstrip('\r\n'))
for line in lines
)
def add_cocs(original, additional):
"""Add two tuples of COCs. Extend as needed."""
assert not (original is None and additional is None), "No COCs to add!"
if original is None:
return additional
elif additional is None:
return original
else:
common = tuple(lhs + rhs for lhs, rhs in zip(original, additional))
return (
common +
tuple(original[len(common):]) +
tuple(additional[len(common):]))
def merge_coc(files, output):
"""Merge COC files for the given partial files.
Each COC file is a series of integers, one per line. This reads them, and
adds them up line-wise into one file of the same format: the sum of the
numbers the respective files have at line 1, the sum of the numbers the
respective files have at line 2, and so on.
"""
assert len(files) > 0, "No partial files - no work to do."
extract_files = [extract_file for extract_file, _ in files]
if not os.path.exists(compose_coc_path_for(extract_files[0])):
# Nothing to merge.
return
totals = None
# TODO: Shouldn't we just fail if any of these files is missing?
for coc_path in list_existing(map(compose_coc_path_for, extract_files)):
totals = add_cocs(totals, read_cocs(coc_path))
# Write to output file.
with open(output, 'w') as output_stream:
for entry in totals:
output_stream.write('%d\n' % entry)
def suffix_line_numbers(infile, outfile):
"""Rewrite `infile` to `outfile`; suffix line number to each line.
The line number is zero-based, and separated from the rest of the line
by a single space.
"""
temp_file = '%s.numbering' % outfile
with open(infile, 'r') as instream, open(outfile, 'w') as outstream:
line_no = 0
for line in instream:
outstream.write(line)
outstream.write(' %d\n' % line_no)
line_no += 1
os.rename(temp_file, outfile)
def compose_source_labels_path_for(path):
"""Return source labels file path for given file."""
return '%s.syntaxLabels.src' % path
def merge_numbered_files(inputs, output, header_lines, sort_command,
verbose=False):
"""Sort and merge files `inputs`, add header and line numbers.
:param inputs: Iterable of input files.
:param output: Output file.
:header_lines: Iterable of header lines.
:sort_command: Command line for sorting input files.
"""
sort_temp = '%s.sorting' % output
with open(sort_temp, 'w') as stream:
for line in header_lines:
stream.write(line)
stream.write('\n')
execute_shell(
"%s %s >>%s" % (
sort_command,
' '.join(map(quote, inputs)),
quote(sort_temp)),
verbose=verbose)
suffix_line_numbers(sort_temp, output)
def merge_source_labels(files, output, sort_command, verbose=False):
"""Merge source labels files."""
# TODO: Shouldn't we just fail if any of these files is missing?
labels_files = list_existing(map(compose_source_labels_path_for, files))
header = [
'GlueTop',
'GlueX',
'SSTART',
'SEND',
]
merge_numbered_files(
labels_files, output, header, sort_command, verbose=verbose)
def compose_parts_of_speech_path_for(path):
"""Return parts-of-speech file path for given file."""
return '%s.partsOfSpeech' % path
def merge_parts_of_speech(files, output, sort_command, verbose=False):
"""Merge parts-of-speech files into output."""
# TODO: Shouldn't we just fail if any of these files is missing?
parts_files = list_existing(map(compose_parts_of_speech_path_for, files))
header = [
'SSTART',
'SEND',
]
merge_numbered_files(
parts_files, output, header, sort_command, verbose=verbose)
def main():
"""Command-line entry point. Marshals and forwards to `score_parallel`."""
args = parse_args()
sanitize_args(args)
set_temp_dir()
if args.flexibility_score is None:
extract_context_file = None
else:
extract_context_file = args.extract_file.replace(
'extract.', 'extract.context.')
if args.verbose:
print("Started %s." % datetime.now())
print("Using '%s' for gzip." % args.gzip_command)
with tempdir(args.debug) as split_dir:
extract_files = split_extract_files(
split_dir, args.extract_file,
extract_context_file=extract_context_file, jobs=args.jobs)
scored_files = score_parallel(split_dir, extract_files, args)
if args.verbose:
sys.stderr.write("Finished score %s.\n" % datetime.now())
# TODO: Pass on "sort" and "flexibility-score" arguments?
merge_and_sort(
[phrase_chunk for phrase_chunk, _ in scored_files], args.output,
sort_command=args.sort_command, gzip_exe=args.gzip_command,
verbose=args.verbose)
merge_coc(extract_files, compose_coc_path_for(args.output))
if not args.inverse and args.labels_file is not None:
if args.verbose:
print("Merging source labels files.")
merge_source_labels(
extract_files, args.labels_file,
sort_command=args.sort_command, verbose=args.verbose)
if not args.inverse and args.parts_of_speech is not None:
if args.verbose:
print("Merging parts-of-speech files.")
merge_parts_of_speech(
extract_files, args.parts_of_speech,
sort_command=args.sort_command, verbose=args.verbose)
if __name__ == '__main__':
try:
main()
except ProgramFailure as error:
sys.stderr.write('%s\n' % error)
sys.exit(1)
except CommandLineError as error:
sys.stderr.write("Command line error: %s\n" % error)
sys.exit(2)
|