|
|
|
|
|
|
|
|
|
|
|
use warnings; |
|
use strict; |
|
|
|
while(<STDIN>) { |
|
if (/^\(\(\)\)/) { |
|
print "\n"; |
|
next; |
|
} |
|
|
|
|
|
s/^\( \( (.+) \)$/\(TOP $1/; |
|
s/^\( /\(TOP /; |
|
|
|
|
|
s/\&/\&/g; |
|
s/\|/\&bar;/g; |
|
s/\</\</g; |
|
s/\>/\>/g; |
|
s/\'/\'/g; # xml |
|
s/\"/\"/g; # xml |
|
s/\[/\[/g; # syntax non-terminal |
|
s/\]/\]/g; # syntax non-terminal |
|
|
|
# escape parentheses that were part of the input text |
|
s/(\(\S+ )\(\)/$1\&openingparenthesis;\)/g; |
|
s/(\(\S+ )\)\)/$1\&closingparenthesis;\)/g; |
|
|
|
|
|
|
|
# convert into tree |
|
s/\((\S+) /<tree label=\"$1\"> /g; |
|
s/\)/ <\/tree> /g; |
|
s/\"\-LRB\-\"/\"LRB\"/g; # labels |
|
s/\"\-RRB\-\"/\"RRB\"/g; |
|
s/\-LRB\-/\(/g; # tokens |
|
s/\-RRB\-/\)/g; |
|
s/ +/ /g; |
|
s/ $//g; |
|
|
|
# de-escape parentheses that were part of the input text |
|
s/\&openingparenthesis;/\(/g; |
|
s/\&closingparenthesis;/\)/g; |
|
|
|
|
|
# output, replace words with original |
|
print $_; |
|
} |
|
|