File size: 1,300 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use strict;
use warnings;
use Getopt::Std;
our ($opt_q, %count);
our $M = 0;
getopts('q');
my $target = shift;
my $source = shift;
my $align = shift or die "
Usage: extract-singletons.perl target source align
";
open(TARGET,$target) or die "Error: unable to open target file \"$target\"!\n";
open(SOURCE,$source) or die "Error: unable to open source file \"$source\"!\n";
open(ALIGN,$align) or die "Error: unable to open alignment file \"$align\"!\n";
while (<TARGET>) {
unless (defined $opt_q) {
print STDERR "\r$M" if ++$M%1000 == 0;
}
my @T = split;
$_ = <SOURCE>;
my @S = split;
$_ = <ALIGN>;
my @A = split;
my(@source_links,@target_links);
for(my $i=0; $i<=$#A; $i+=2 ) {
$target_links[$A[$i]]++;
$source_links[$A[$i+1]]++;
}
for(my $i=0; $i<=$#A; $i+=2 ) {
if ($target_links[$A[$i]] == 1 && $source_links[$A[$i+1]] == 1 &&
$T[$A[$i]] eq $S[$A[$i+1]])
{
$count{$S[$A[$i+1]]}++; # Print this if it only occurs here
}
else {
$count{$S[$A[$i+1]]}+=2; # Don't print this
}
}
}
foreach my $w (sort keys %count) {
print "$w\n" if $count{$w}==1;
}
|