|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use strict; |
|
|
|
my $CAT_MODELS = 0; |
|
|
|
die("ERROR: syntax is cache-model.perl moses.ini cache-dir") |
|
unless scalar @ARGV >= 2; |
|
my $CONFIG = $ARGV[0]; |
|
my $CACHE_DIR = $ARGV[1]; |
|
if (scalar(@ARGV) == 3) { |
|
$CAT_MODELS = $ARGV[2]; |
|
} |
|
|
|
|
|
`mkdir -p $CACHE_DIR`; |
|
|
|
|
|
my $cached_config = $CONFIG; |
|
$cached_config =~ s/\//_/g; |
|
$cached_config = "$CACHE_DIR/$cached_config"; |
|
|
|
|
|
while(-e "$cached_config.lock") { |
|
sleep(10); |
|
} |
|
my $just_update_timestamps = (-e $cached_config); |
|
`touch $cached_config.lock` unless $just_update_timestamps; |
|
|
|
|
|
open(OLD,$CONFIG) || die("ERROR: could not open config '$CONFIG'"); |
|
open(NEW,">$cached_config") unless $just_update_timestamps; |
|
while(<OLD>) { |
|
if (/(PhraseDictionary.+ path=)(\S+)(.*)$/ || |
|
/(LexicalReordering.+ path=)(\S+)(.*)$/ || |
|
/(Generation.+ path=)(\S+)(.*)$/ || |
|
/(OpSequenceModel.+ path=)(\S+)(.*)$/ || |
|
/(KENLM.+ path=)(\S+)(.*)$/) { |
|
my ($pre,$path,$post) = ($1,$2,$3); |
|
my $new_path; |
|
if (/^PhraseDictionaryCompact/) { |
|
$new_path = &cache_file($path,".minphr", $CAT_MODELS); |
|
} |
|
elsif (/^PhraseDictionaryBinary/) { |
|
foreach my $suffix (".binphr.idx",".binphr.srctree.wa",".binphr.srcvoc",".binphr.tgtdata.wa",".binphr.tgtvoc") { |
|
$new_path = &cache_file($path,$suffix, $CAT_MODELS); |
|
} |
|
} |
|
elsif (/^LexicalReordering/ && -e "$path.minlexr") { |
|
$new_path = &cache_file($path,".minlexr", $CAT_MODELS); |
|
} |
|
elsif (/^LexicalReordering/ && -e "$path.binlexr.idx") { |
|
foreach my $suffix (".binlexr.idx",".binlexr.srctree",".binlexr.tgtdata",".binlexr.voc0",".binlexr.voc1") { |
|
$new_path = &cache_file($path,$suffix, $CAT_MODELS); |
|
} |
|
} |
|
|
|
|
|
else { |
|
$new_path = &cache_file($path,"", $CAT_MODELS); |
|
} |
|
print NEW "$pre$new_path$post\n" unless $just_update_timestamps; |
|
} |
|
else { |
|
print NEW $_ unless $just_update_timestamps; |
|
} |
|
} |
|
close(NEW) unless $just_update_timestamps; |
|
close(OLD); |
|
|
|
`rm $cached_config.lock` unless $just_update_timestamps; |
|
print "$cached_config\n"; |
|
|
|
sub cache_file { |
|
my ($path,$suffix, $catModels) = @_; |
|
|
|
|
|
if (! -e "$path$suffix" && -e "$path$suffix.gz") { |
|
$suffix .= ".gz"; |
|
} |
|
|
|
|
|
if (! -e "$path$suffix") { |
|
print STDERR "WARINING: $path$suffix does not exist - cannot be cached by cache-model.perl\n"; |
|
return $path; |
|
} |
|
|
|
|
|
my $uniq_path = `readlink -f $path$suffix`; |
|
chop($uniq_path); |
|
|
|
|
|
my $cached_path = $uniq_path; |
|
$cached_path = substr($cached_path,0,length($cached_path)-length($suffix)); |
|
$cached_path =~ s/\//_/g; |
|
$cached_path = "$CACHE_DIR/$cached_path"; |
|
|
|
|
|
while(-e "$cached_path$suffix.lock") { |
|
sleep(10); |
|
} |
|
|
|
if (-e "$cached_path$suffix") { |
|
`touch $cached_path$suffix`; |
|
} |
|
else { |
|
|
|
`touch $cached_path$suffix.lock`; |
|
`cp -r $path$suffix $cached_path$suffix`; |
|
`rm $cached_path$suffix.lock`; |
|
} |
|
|
|
if ($catModels) { |
|
my $cmd = "cat $cached_path* > /dev/null"; |
|
print STDERR "Executing: $cmd\n"; |
|
`$cmd`; |
|
} |
|
return $cached_path; |
|
} |
|
|
|
|