File size: 1,300 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env perl
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

use strict;
use warnings;
use Getopt::Std;

our ($opt_q, %count);
our $M = 0;

getopts('q');

my $target = shift;
my $source = shift;
my $align = shift or die "
Usage: extract-singletons.perl target source align

";
open(TARGET,$target) or die "Error: unable to open target file \"$target\"!\n";
open(SOURCE,$source) or die "Error: unable to open source file \"$source\"!\n";
open(ALIGN,$align) or die "Error: unable to open alignment file \"$align\"!\n";

while (<TARGET>) {
    unless (defined $opt_q) {
	print STDERR "\r$M" if ++$M%1000 == 0;
    }
    my @T = split;
    $_ = <SOURCE>;
    my @S = split;
    $_ = <ALIGN>;
    my @A = split;

    my(@source_links,@target_links);
    for(my $i=0; $i<=$#A; $i+=2 ) {
	$target_links[$A[$i]]++;
	$source_links[$A[$i+1]]++;
    }

    for(my $i=0; $i<=$#A; $i+=2 ) {
	if ($target_links[$A[$i]] == 1 && $source_links[$A[$i+1]] == 1 &&
	    $T[$A[$i]] eq $S[$A[$i+1]])
	{
	    $count{$S[$A[$i+1]]}++; # Print this if it only occurs here
	}
	else {
	    $count{$S[$A[$i+1]]}+=2; # Don't print this
	}
    }
}

foreach my $w (sort keys %count) {
    print "$w\n" if $count{$w}==1;
}