Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# | |
# Convenience script for running | |
# edu.stanford.nlp.trees.treebank.TreebankPreprocessor. | |
# | |
# This package automatically generates the Arabic and French | |
# parser training data from the respective source distributions. | |
# | |
# See the README for more details. | |
# | |
# author: Spence Green | |
############################## | |
import sys | |
from optparse import OptionParser | |
import os | |
import subprocess | |
from time import sleep | |
def run_treebank_pipeline(opts,conf_file): | |
cmd_line = 'java -Xmx%s -Xms%s edu.stanford.nlp.trees.treebank.TreebankPreprocessor' % (opts.jmem,opts.jmem) | |
if opts.verbose: | |
cmd_line = cmd_line + ' -v' | |
if opts.extra: | |
cmd_line = cmd_line + ' ' + opts.extra | |
if opts.output_path: | |
cmd_line = cmd_line + ' -p ' + opts.output_path | |
cmd_line = cmd_line + ' ' + conf_file | |
p = call_command(cmd_line) | |
while p.poll() == None: | |
out_str = p.stdout.readline() | |
if out_str != '': | |
print out_str[:-1] | |
# TODO: this will not handle spaces in the input or output paths | |
def call_command(command): | |
process = subprocess.Popen(command.split(' '), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) | |
return process | |
def main(): | |
usage = 'usage: %prog [opts] conf_file' | |
parser = OptionParser(usage=usage) | |
parser.add_option('-m','--java-mem',dest='jmem',default='500m',help='Set JVM memory heap size (e.g. 500m)') | |
parser.add_option('-v','--verbose',dest='verbose',action='store_true',default=False,help='Verbose mode') | |
parser.add_option('-o','--options',dest='extra',help='Pass options directly to TreebankPreprocessor') | |
parser.add_option('-p','--output-path',dest='output_path',help="Destination directory for the output") | |
(opts,args) = parser.parse_args() | |
if len(args) != 1: | |
parser.print_help() | |
sys.exit(-1) | |
conf_file = args[0] | |
run_treebank_pipeline(opts,conf_file) | |
if __name__ == '__main__': | |
main() | |