| if ($# != 2) then | |
| echo "argument 1 = tagged_text" | |
| echo "argument 2 = pos list" | |
| exit(-1) | |
| endif | |
| set lines_text = `cat $1|wc -l` | |
| set k = 1 | |
| while ($k <= $lines_text) | |
| set plain_line = `cat $1|head -$k|tail -1` | |
| echo $plain_line > plain_line | |
| set num_words = `cat plain_line|head -$k|tail -1|wc -w` | |
| set i = 1 | |
| while ($i <= $num_words) | |
| set word = `cat plain_line|head -$k|tail -1|cut -d " " -f$i` | |
| set tag = `cat plain_line|head -$k|tail -1|cut -d " " -f$i|cut -d "/" -f2` | |
| set chk_tag = `grep -w $tag $2|wc -l` | |
| if ($chk_tag != 0) then | |
| echo $word >> break_points | |
| endif | |
| @ i++ | |
| end | |
| set breaks = `cat break_points |wc -l` | |
| set j = 1 | |
| while ($j <= $breaks) | |
| set old = `cat break_points|head -$j|tail -1` | |
| sed -i "s@$old@$old ,/RD_PUNC @" plain_line | |
| @ j++ | |
| end | |
| sed -i 's/ / /g' plain_line | |
| ./scripts/detagging plain_line | |
| rm break_points | |
| @ k++ | |
| end | |