#!/usr/bin/env perl # # This file is part of moses. Its use is licensed under the GNU Lesser General # Public License version 2.1 or, at your option, any later version. use warnings; use strict; use Getopt::Long "GetOptions"; my $MARK_HYP = 0; my $BINARIZE = 0; my $SLASH = 0; die unless &GetOptions('binarize' => \$BINARIZE,'mark-split' => \$MARK_HYP,'slash' => \$SLASH); my $punc = $SLASH ? "/" : "-"; while() { chop; my @OUT = (); foreach (split) { if (/^$/) { push @OUT, $_; } elsif(/([\p{IsAlnum}])$punc([\p{IsAlnum}])/) { s/([\p{IsAlnum}])$punc([\p{IsAlnum}])/$1 \@$punc\@ $2/g; my @WORD = split; $OUT[$#OUT] =~ /label=\"([^\"]+)\"/; my $pos = $1; my $mark = $SLASH ? "SLASH-" : "HYP-"; my $punc_pos = $SLASH ? "SLASH" : "HYP"; if ($MARK_HYP) { $OUT[$#OUT] =~ s/label=\"/label=\"$mark/; } if ($BINARIZE) { for(my $i=0;$i"; } } for(my $i=0;$i=2) { push @OUT, ""; } push @OUT," $WORD[$i] "; } } else { push @OUT, $_; } } print join(" ",@OUT)."\n"; }