File size: 1,706 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env perl
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

use warnings;
use strict;

my ($ttable_file) = @ARGV;

die("syntax: remove-orphan-phrase-pairs-from-reordering-table.perl TTABLE < REORDERING_TABLE > REORDERING_TABLE.pruned")
    unless defined($ttable_file) && -e $ttable_file;

if ($ttable_file =~ /gz$/) {
  open(TTABLE,"zcat $ttable_file|");
}
else {
  open(TTABLE,$ttable_file);
}

# get first ttable line to be matched
my $ttable_line = <TTABLE>;
my $ttable_phrase_pair = &get_phrase_pair($ttable_line);

# loop through reordering table
while(my $reordering_line = <STDIN>) {
  my $reordering_phrase_pair = &get_phrase_pair($reordering_line);

  # if it does not match ttable line, then keep looping
  #print STDERR "$reordering_phrase_pair ?? $ttable_phrase_pair\n";
  while($reordering_phrase_pair ne $ttable_phrase_pair) {
    #print STDERR "$reordering_phrase_pair != $ttable_phrase_pair\n";
    $reordering_line = <STDIN>;
    last if !defined($reordering_line); # end of file, done
    $reordering_phrase_pair = &get_phrase_pair($reordering_line);
  }
  last if !defined($reordering_line); # end of file, done

  # print matched line
  print $reordering_line;

  # read next ttable line to be matched
  $ttable_line = <TTABLE>;
  last if !defined($ttable_line); # end of file, done
  $ttable_phrase_pair = &get_phrase_pair($ttable_line);
}
if (defined($ttable_line)) {
  print STDERR "ERROR: trailing ttable lines -> could not find $ttable_line!\n";
}

sub get_phrase_pair {
  my ($line) = @_;
  my ($src,$tgt,$other) = split(/ \|\|\| /,$line);
  return "$src ||| $tgt";
}