File size: 3,786 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
#!/usr/bin/perl -w
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# utility script for deploying decode (may be within tune) jobs over a cluster
# with NFS-mounted drives. copy all the model files to local disk.
use strict;
my $CAT_MODELS = 0;
die("ERROR: syntax is cache-model.perl moses.ini cache-dir")
unless scalar @ARGV >= 2;
my $CONFIG = $ARGV[0];
my $CACHE_DIR = $ARGV[1];
if (scalar(@ARGV) == 3) {
$CAT_MODELS = $ARGV[2];
}
# create dir (if nor already there)
`mkdir -p $CACHE_DIR`;
# name for new config file
my $cached_config = $CONFIG;
$cached_config =~ s/\//_/g;
$cached_config = "$CACHE_DIR/$cached_config";
# lock / already
while(-e "$cached_config.lock") {
sleep(10);
}
my $just_update_timestamps = (-e $cached_config);
`touch $cached_config.lock` unless $just_update_timestamps;
# find files to cache (and produce new config)
open(OLD,$CONFIG) || die("ERROR: could not open config '$CONFIG'");
open(NEW,">$cached_config") unless $just_update_timestamps;
while(<OLD>) {
if (/(PhraseDictionary.+ path=)(\S+)(.*)$/ ||
/(LexicalReordering.+ path=)(\S+)(.*)$/ ||
/(Generation.+ path=)(\S+)(.*)$/ ||
/(OpSequenceModel.+ path=)(\S+)(.*)$/ ||
/(KENLM.+ path=)(\S+)(.*)$/) {
my ($pre,$path,$post) = ($1,$2,$3);
my $new_path;
if (/^PhraseDictionaryCompact/) {
$new_path = &cache_file($path,".minphr", $CAT_MODELS);
}
elsif (/^PhraseDictionaryBinary/) {
foreach my $suffix (".binphr.idx",".binphr.srctree.wa",".binphr.srcvoc",".binphr.tgtdata.wa",".binphr.tgtvoc") {
$new_path = &cache_file($path,$suffix, $CAT_MODELS);
}
}
elsif (/^LexicalReordering/ && -e "$path.minlexr") {
$new_path = &cache_file($path,".minlexr", $CAT_MODELS);
}
elsif (/^LexicalReordering/ && -e "$path.binlexr.idx") {
foreach my $suffix (".binlexr.idx",".binlexr.srctree",".binlexr.tgtdata",".binlexr.voc0",".binlexr.voc1") {
$new_path = &cache_file($path,$suffix, $CAT_MODELS);
}
}
# some other files may need some more special handling
# but this works for me right now. feel free to add
else {
$new_path = &cache_file($path,"", $CAT_MODELS);
}
print NEW "$pre$new_path$post\n" unless $just_update_timestamps;
}
else {
print NEW $_ unless $just_update_timestamps;
}
}
close(NEW) unless $just_update_timestamps;
close(OLD);
`rm $cached_config.lock` unless $just_update_timestamps;
print "$cached_config\n";
sub cache_file {
my ($path,$suffix, $catModels) = @_;
# add gzipped extension if that's what it is
if (! -e "$path$suffix" && -e "$path$suffix.gz") {
$suffix .= ".gz";
}
# file does not exist... nothing to do
if (! -e "$path$suffix") {
print STDERR "WARINING: $path$suffix does not exist - cannot be cached by cache-model.perl\n";
return $path;
}
# follow symbolic link
my $uniq_path = `readlink -f $path$suffix`;
chop($uniq_path);
# create cached file name
my $cached_path = $uniq_path;
$cached_path = substr($cached_path,0,length($cached_path)-length($suffix));
$cached_path =~ s/\//_/g;
$cached_path = "$CACHE_DIR/$cached_path";
# sleep if another process is copying right now...
while(-e "$cached_path$suffix.lock") {
sleep(10);
}
# done if already there
if (-e "$cached_path$suffix") {
`touch $cached_path$suffix`; # update time stamp
}
else {
# okay, go for it
`touch $cached_path$suffix.lock`;
`cp -r $path$suffix $cached_path$suffix`;
`rm $cached_path$suffix.lock`;
}
if ($catModels) {
my $cmd = "cat $cached_path* > /dev/null";
print STDERR "Executing: $cmd\n";
`$cmd`;
}
return $cached_path;
}
|