#!/usr/bin/env perl | |
# | |
# This file is part of moses. Its use is licensed under the GNU Lesser General | |
# Public License version 2.1 or, at your option, any later version. | |
use warnings; | |
use strict; | |
while (@ARGV) { | |
$_ = shift; | |
/^-b$/ && ($| = 1, next); # not buffered (flush each line) | |
} | |
while(<STDIN>) { | |
chop; | |
# avoid general madness | |
s/[\000-\037]//g; | |
s/\s+/ /g; | |
s/^ //g; | |
s/ $//g; | |
# special characters in moses | |
s/\&/\&/g; # escape escape | |
s/\|/\|/g; # factor separator | |
s/\</\</g; # xml | |
s/\>/\>/g; # xml | |
s/\'/\'/g; # xml | |
s/\"/\"/g; # xml | |
s/\[/\[/g; # syntax non-terminal | |
s/\]/\]/g; # syntax non-terminal | |
# restore xml instructions | |
s/\<(\S+) translation="(.+?)"> (.+?) <\/(\S+)>/\<$1 translation=\"$2\"> $3 <\/$4>/g; | |
print $_."\n"; | |
} | |