#!/usr/bin/env perl # # This file is part of moses. Its use is licensed under the GNU Lesser General # Public License version 2.1 or, at your option, any later version. use warnings; use strict; while() { if (/^\(\(\)\)/) { print "\n"; # parse failures next; } # prep s/^\( /\(TOP /; # escape words s/\&/\&/g; # escape escape s/\|/\&bar;/g; # factor separator s/\|/\|/g; # factor separator s/\/\>/g; # xml s/\'\'/\"/g; s/``/\"/g; s/\'/\'/g; # xml s/\"/\"/g; # xml s/\[/\[/g; # syntax non-terminal s/\]/\]/g; # syntax non-terminal # escape parentheses that were part of the input text s/(\(\S+ )\(\)/$1\&openingparenthesis;\)/g; s/(\(\S+ )\)\)/$1\&closingparenthesis;\)/g; # convert into tree s/\((\S+) / /g; s/\)/ <\/tree> /g; s/\"\-LRB\-\"/\"LRB\"/g; # labels s/\"\-RRB\-\"/\"RRB\"/g; s/\-LRB\-/\(/g; # tokens s/\-RRB\-/\)/g; s/ +/ /g; s/ $//g; # de-escape parentheses that were part of the input text s/\&openingparenthesis;/\(/g; s/\&closingparenthesis;/\)/g; s/tree label=\"\"\"/tree label=\"QUOT\"/g; #s/tree label=\"''\"/tree label=\"QUOT\"/g; #s/tree label=\"``\"/tree label=\"QUOT\"/g; # output, replace words with original print $_; }