|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
cleanup() |
|
{ |
|
if [ -e $fifo ] ; then rm $fifo; fi |
|
if [ -e $fifo.inv ] ; then rm $fifo.inv; fi |
|
if [ -e $fifo.o ] ; then rm $fifo.o; fi |
|
} |
|
|
|
usage() |
|
{ |
|
echo |
|
echo "$0: wrapper script to extract phrases from word-aligned corpus" |
|
echo -e "usage:\n $0 <extractor> <ibase> <L1tag> <L2tag> [-x] " |
|
echo "options:" |
|
echo "-l: maximum phrase length ($plen)" |
|
echo "-m: distortion model specification" |
|
echo "-o: base name for output files .fwd.gz .bwd.gz [.<dmodel>.dst.gz]" |
|
echo "-x: (no argument) don't create .fwd.gz and .bwd.gz" |
|
echo |
|
echo "required input files: <ibase>.<L1tag>.gz ibase.<L2tag>.gz ibase.<aln>.gz" |
|
} |
|
|
|
plen=7 |
|
nottable= |
|
dmodel= |
|
dspec= |
|
pargs= |
|
sfactors= |
|
tfactors= |
|
while [ $# -gt 0 ]; do |
|
case $1 in |
|
-l*) plen=${1#-l} |
|
plen=${plen#=} |
|
if [ -z $plen ] ; then |
|
shift |
|
plen=$1 |
|
fi |
|
;; |
|
-m*) dmodel=${1#-m} |
|
dmodel=${dmodel#=} |
|
if [ -z $dmodel ] ; then |
|
shift |
|
dmodel="$1" |
|
fi |
|
;; |
|
-o*) obase=${1#-o} |
|
obase=${obase#=} |
|
if [ -z $obase ] ; then |
|
shift |
|
obase=$1 |
|
fi |
|
;; |
|
-s*) sfactors=${1#-s} |
|
sfactors=${sfactors#=} |
|
if [ -z $sfactors ] ; then |
|
shift |
|
sfactors = $1 |
|
fi |
|
;; |
|
-t*) tfactors=${1#-t} |
|
tfactors=${tfactors#=} |
|
if [ -z $tfactors ] ; then |
|
shift |
|
sfactors = $1 |
|
fi |
|
;; |
|
-x) nottable=1;; |
|
-h) usage; exit 0;; |
|
*) pargs=(${pargs[*]} $1);; |
|
esac |
|
shift |
|
done |
|
|
|
if [ -n "$sfactors" ] || [ -n "$tfactors" ] ; then |
|
echo "Factor filtering is not implemented yet!" |
|
exit 2 |
|
fi |
|
|
|
extract=${pargs[0]} |
|
ibase=${pargs[1]} |
|
L1tag=${pargs[2]} |
|
L2tag=${pargs[3]} |
|
obase=${obase:=$ibase} |
|
|
|
fifo=$obase.$$ |
|
trap 'cleanup' 0 |
|
|
|
export LC_ALL=C |
|
if [ -z "$nottable" ] ; then |
|
mkfifo $fifo; sort -S 5G < $fifo | gzip > $obase.fwd.gz & |
|
mkfifo $fifo.inv; sort -S 5G < $fifo.inv | gzip > $obase.bwd.gz & |
|
fi |
|
if [ -n "$dmodel" ] ; then |
|
mkfifo $fifo.o |
|
sort -S 5G < $fifo.o | gzip > $obase.dst.gz & |
|
dspec="orientation --model " |
|
dspec+=`echo $dmodel | perl -pe 's/((hier|phrase|wbe)-(msd|msrl|mono)).*/$1/;'` |
|
fi |
|
|
|
txt1=${ibase}.${L1tag}.gz |
|
txt2=${ibase}.${L2tag}.gz |
|
aln=${ibase}.aln.gz |
|
echo "($extract <(zcat -f $txt1) <(zcat -f $txt2) <(zcat -f $aln) $fifo $plen $dspec) || exit 1" |
|
($extract <(zcat -f $txt2) <(zcat -f $txt1) <(zcat -f $aln) $fifo $plen $dspec) || exit 1 |
|
wait |
|
|