use warnings; |
use strict; |
while(<STDIN>) { |
if (/^\(\(\)\)/) { |
print "\n"; |
next; |
} |
s/^\( \( (.+) \)$/\(TOP $1/; |
s/^\( /\(TOP /; |
s/\&/\&/g; |
s/\|/\&bar;/g; |
s/\</\</g; |
s/\>/\>/g; |
s/\'/\'/g; # xml |
s/\"/\"/g; # xml |
s/\[/\[/g; # syntax non-terminal |
s/\]/\]/g; # syntax non-terminal |
# escape parentheses that were part of the input text |
s/(\(\S+ )\(\)/$1\&openingparenthesis;\)/g; |
s/(\(\S+ )\)\)/$1\&closingparenthesis;\)/g; |
# convert into tree |
s/\((\S+) /<tree label=\"$1\"> /g; |
s/\)/ <\/tree> /g; |
s/\"\-LRB\-\"/\"LRB\"/g; # labels |
s/\"\-RRB\-\"/\"RRB\"/g; |
s/\-LRB\-/\(/g; # tokens |
s/\-RRB\-/\)/g; |
s/ +/ /g; |
s/ $//g; |
# de-escape parentheses that were part of the input text |
s/\&openingparenthesis;/\(/g; |
s/\&closingparenthesis;/\)/g; |
# output, replace words with original |
print $_; |
} |