File size: 1,706 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;
my ($ttable_file) = @ARGV;
die("syntax: remove-orphan-phrase-pairs-from-reordering-table.perl TTABLE < REORDERING_TABLE > REORDERING_TABLE.pruned")
unless defined($ttable_file) && -e $ttable_file;
if ($ttable_file =~ /gz$/) {
open(TTABLE,"zcat $ttable_file|");
}
else {
open(TTABLE,$ttable_file);
}
# get first ttable line to be matched
my $ttable_line = <TTABLE>;
my $ttable_phrase_pair = &get_phrase_pair($ttable_line);
# loop through reordering table
while(my $reordering_line = <STDIN>) {
my $reordering_phrase_pair = &get_phrase_pair($reordering_line);
# if it does not match ttable line, then keep looping
#print STDERR "$reordering_phrase_pair ?? $ttable_phrase_pair\n";
while($reordering_phrase_pair ne $ttable_phrase_pair) {
#print STDERR "$reordering_phrase_pair != $ttable_phrase_pair\n";
$reordering_line = <STDIN>;
last if !defined($reordering_line); # end of file, done
$reordering_phrase_pair = &get_phrase_pair($reordering_line);
}
last if !defined($reordering_line); # end of file, done
# print matched line
print $reordering_line;
# read next ttable line to be matched
$ttable_line = <TTABLE>;
last if !defined($ttable_line); # end of file, done
$ttable_phrase_pair = &get_phrase_pair($ttable_line);
}
if (defined($ttable_line)) {
print STDERR "ERROR: trailing ttable lines -> could not find $ttable_line!\n";
}
sub get_phrase_pair {
my ($line) = @_;
my ($src,$tgt,$other) = split(/ \|\|\| /,$line);
return "$src ||| $tgt";
}
|