File size: 1,373 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;
use Getopt::Long "GetOptions";
my $MARK_HYP = 0;
my $BINARIZE = 0;
my $SLASH = 0;
die unless &GetOptions('binarize' => \$BINARIZE,'mark-split' => \$MARK_HYP,'slash' => \$SLASH);
my $punc = $SLASH ? "/" : "-";
while(<STDIN>) {
chop;
my @OUT = ();
foreach (split) {
if (/^</ || />$/) {
push @OUT, $_;
}
elsif(/([\p{IsAlnum}])$punc([\p{IsAlnum}])/) {
s/([\p{IsAlnum}])$punc([\p{IsAlnum}])/$1 \@$punc\@ $2/g;
my @WORD = split;
$OUT[$#OUT] =~ /label=\"([^\"]+)\"/;
my $pos = $1;
my $mark = $SLASH ? "SLASH-" : "HYP-";
my $punc_pos = $SLASH ? "SLASH" : "HYP";
if ($MARK_HYP) {
$OUT[$#OUT] =~ s/label=\"/label=\"$mark/;
}
if ($BINARIZE) {
for(my $i=0;$i<scalar(@WORD)-2;$i++) {
push @OUT,"<tree label=\"\@".($MARK_HYP ? $mark : "")."$pos\">";
}
}
for(my $i=0;$i<scalar(@WORD);$i++) {
if ($BINARIZE && $i>=2) {
push @OUT, "</tree>";
}
push @OUT,"<tree label=\"".(($WORD[$i] eq "\@$punc\@") ? $punc_pos : $pos)."\"> $WORD[$i] </tree>";
}
}
else {
push @OUT, $_;
}
}
print join(" ",@OUT)."\n";
}
|