#!/usr/bin/env perl # # This file is part of moses. Its use is licensed under the GNU Lesser General # Public License version 2.1 or, at your option, any later version. use warnings; use strict; my ($lowercase,$cluster_file,$in,$out,$tmp) = @ARGV; my $CLUSTER = &read_cluster_from_mkcls($cluster_file); # is $lowercase a script? if ($lowercase =~ /\//) { open(IN,"$lowercase < $in|") || die("ERROR: could not open input"); $lowercase = 0; } else { open(IN,$in) || die("ERROR: could not open input"); } binmode(IN, ":utf8"); open(OUT,">$out"); binmode(OUT, ":utf8"); while() { chop; s/\s+/ /g; s/^ //; s/ $//; my $first = 1; foreach my $word (split) { # if lowercase is a flag if ($lowercase) { $word = lc($word); } my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "0"; print OUT " " unless $first; print OUT $cluster; $first = 0; } print OUT "\n"; } close(OUT); close(IN); sub read_cluster_from_mkcls { my ($file) = @_; my %CLUSTER; open(CLUSTER_FILE,$file) || die("ERROR: could not open cluster file '$file'"); binmode(CLUSTER_FILE, ":utf8"); while() { chop; my ($word,$cluster) = split; $CLUSTER{$word} = $cluster; } close(CLUSTER_FILE); return \%CLUSTER; } sub add_cluster_to_string { }