File size: 3,786 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/perl -w
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

# utility script for deploying decode (may be within tune) jobs over a cluster
# with NFS-mounted drives. copy all the model files to local disk.

use strict;

my $CAT_MODELS = 0;

die("ERROR: syntax is cache-model.perl moses.ini cache-dir") 
  unless scalar @ARGV >= 2;
my $CONFIG = $ARGV[0];
my $CACHE_DIR = $ARGV[1];
if (scalar(@ARGV) == 3) {
    $CAT_MODELS = $ARGV[2];
}

# create dir (if nor already there)
`mkdir -p $CACHE_DIR`;

# name for new config file
my $cached_config = $CONFIG;
$cached_config =~ s/\//_/g;
$cached_config = "$CACHE_DIR/$cached_config";

# lock / already
while(-e "$cached_config.lock") {
  sleep(10);
}
my $just_update_timestamps = (-e $cached_config);
`touch $cached_config.lock` unless $just_update_timestamps;

# find files to cache (and produce new config)
open(OLD,$CONFIG) || die("ERROR: could not open config '$CONFIG'");
open(NEW,">$cached_config") unless $just_update_timestamps;
while(<OLD>) {
  if (/(PhraseDictionary.+ path=)(\S+)(.*)$/ ||
      /(LexicalReordering.+ path=)(\S+)(.*)$/ ||
      /(Generation.+ path=)(\S+)(.*)$/ ||
      /(OpSequenceModel.+ path=)(\S+)(.*)$/ ||
      /(KENLM.+ path=)(\S+)(.*)$/) {
    my ($pre,$path,$post) = ($1,$2,$3);
    my $new_path;
    if (/^PhraseDictionaryCompact/) {
      $new_path = &cache_file($path,".minphr", $CAT_MODELS);
    }
    elsif (/^PhraseDictionaryBinary/) {
      foreach my $suffix (".binphr.idx",".binphr.srctree.wa",".binphr.srcvoc",".binphr.tgtdata.wa",".binphr.tgtvoc") {
        $new_path = &cache_file($path,$suffix, $CAT_MODELS);
      }
    }
    elsif (/^LexicalReordering/ && -e "$path.minlexr") {
      $new_path = &cache_file($path,".minlexr", $CAT_MODELS);
    }
    elsif (/^LexicalReordering/ && -e "$path.binlexr.idx") {
      foreach my $suffix (".binlexr.idx",".binlexr.srctree",".binlexr.tgtdata",".binlexr.voc0",".binlexr.voc1") {
        $new_path = &cache_file($path,$suffix, $CAT_MODELS);
      }
    }
    # some other files may need some more special handling
    # but this works for me right now. feel free to add
    else {  
      $new_path = &cache_file($path,"", $CAT_MODELS);
    }
    print NEW "$pre$new_path$post\n" unless $just_update_timestamps;
  }
  else {
    print NEW $_ unless $just_update_timestamps;
  }
}
close(NEW) unless $just_update_timestamps;
close(OLD);

`rm $cached_config.lock` unless $just_update_timestamps;
print "$cached_config\n";

sub cache_file {
  my ($path,$suffix, $catModels) = @_;

  # add gzipped extension if that's what it is 
  if (! -e "$path$suffix" && -e "$path$suffix.gz") {
    $suffix .= ".gz";
  }

  # file does not exist... nothing to do
  if (! -e "$path$suffix") {
    print STDERR "WARINING: $path$suffix does not exist - cannot be cached by cache-model.perl\n";
    return $path;
  }

  # follow symbolic link
  my $uniq_path = `readlink -f $path$suffix`;
  chop($uniq_path);

  # create cached file name
  my $cached_path = $uniq_path;   
  $cached_path = substr($cached_path,0,length($cached_path)-length($suffix));
  $cached_path =~ s/\//_/g;
  $cached_path = "$CACHE_DIR/$cached_path";

  # sleep if another process is copying right now...
  while(-e "$cached_path$suffix.lock") {
    sleep(10);
  }
  # done if already there
  if (-e "$cached_path$suffix") {
    `touch $cached_path$suffix`; # update time stamp
  }
  else {
    # okay, go for it
    `touch $cached_path$suffix.lock`;
    `cp -r $path$suffix $cached_path$suffix`;
    `rm $cached_path$suffix.lock`;
  }

  if ($catModels) {
      my $cmd = "cat $cached_path* > /dev/null";
      print STDERR "Executing: $cmd\n";
      `$cmd`;
  }
  return $cached_path;
}