NMTKD
/
translation
/tools
/mosesdecoder
/scripts
/training
/wrappers
/make-factor-en-pos.mxpost.perl
#!/usr/bin/env perl | |
# | |
# This file is part of moses. Its use is licensed under the GNU Lesser General | |
# Public License version 2.1 or, at your option, any later version. | |
use warnings; | |
use strict; | |
use FindBin qw($RealBin); | |
use Getopt::Long "GetOptions"; | |
my ($IN,$OUT,$MXPOST); | |
if (!&GetOptions('mxpost=s' => \$MXPOST) || | |
!($IN = shift @ARGV) || | |
!($OUT = shift @ARGV) || | |
!defined($MXPOST)) { | |
print "syntax: make-pos-en.mxpost.perl -mxpost INSTALL_DIR IN_FILE OUT_FILE\n"; | |
exit(1); | |
} | |
my $pipeline = "perl -ne 'chop; tr/\\x20-\\x7f/\?/c; print \$_.\"\\n\";' | tee debug | "; | |
$pipeline .= "$MXPOST/mxpost $MXPOST/tagger.project |"; | |
open(TAGGER,"$RealBin/../../tokenizer/deescape-special-chars.perl < $IN | $pipeline"); | |
open(OUT,"| $RealBin/../../tokenizer/escape-special-chars.perl > $OUT"); | |
while(<TAGGER>) { | |
foreach my $word_pos (split) { | |
$word_pos =~ s/\/([^\/]+)$/_$1/; | |
$word_pos = "//_:" if $word_pos eq "//"; | |
print STDERR "faulty POS tag: $word_pos\n" | |
unless $word_pos =~ /^.+_([^_]+)$/; | |
print OUT "$1 "; | |
} | |
print OUT "\n"; | |
} | |
close(OUT); | |
close(TAGGER); | |