File size: 1,980 Bytes
b028d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
#
# Convenience script for running
# edu.stanford.nlp.trees.treebank.TreebankPreprocessor.
#
# This package automatically generates the Arabic and French
# parser training data from the respective source distributions.
#
# See the README for more details.
#
# author:  Spence Green
##############################

import sys
from optparse import OptionParser
import os
import subprocess
from time import sleep

def run_treebank_pipeline(opts,conf_file):
    cmd_line = 'java -Xmx%s -Xms%s edu.stanford.nlp.trees.treebank.TreebankPreprocessor' % (opts.jmem,opts.jmem)

    if opts.verbose:
        cmd_line = cmd_line + ' -v'

    if opts.extra:
        cmd_line = cmd_line + ' ' + opts.extra

    if opts.output_path:
        cmd_line = cmd_line + ' -p ' + opts.output_path

    cmd_line = cmd_line + ' ' + conf_file

    p = call_command(cmd_line)

    while p.poll() == None:
        out_str = p.stdout.readline()
        if out_str != '':
            print out_str[:-1]

# TODO: this will not handle spaces in the input or output paths
def call_command(command):
    process = subprocess.Popen(command.split(' '), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    return process

def main():
    usage = 'usage: %prog [opts] conf_file'
    parser = OptionParser(usage=usage)
    parser.add_option('-m','--java-mem',dest='jmem',default='500m',help='Set JVM memory heap size (e.g. 500m)')
    parser.add_option('-v','--verbose',dest='verbose',action='store_true',default=False,help='Verbose mode')
    parser.add_option('-o','--options',dest='extra',help='Pass options directly to TreebankPreprocessor')
    parser.add_option('-p','--output-path',dest='output_path',help="Destination directory for the output")

    (opts,args) = parser.parse_args()

    if len(args) != 1:
        parser.print_help()
        sys.exit(-1)

    conf_file = args[0]

    run_treebank_pipeline(opts,conf_file)

if __name__ == '__main__':
    main()