File size: 4,472 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
#!/usr/bin/env perl
# $Id$
# given a moses.ini file, prints a copy to stdout but replaces all relative
# paths with absolute paths.
#
# Ondrej Bojar.
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
my $ini = shift;
die "usage: absolutize_moses_model.pl path-to-moses.ini > moses.abs.ini"
if !defined $ini;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
$inih = my_open($ini);
while (<$inih>) {
if (/^\[([^\]]*)\]\s*$/) {
$section = $1;
}
if (/^[0-9]/) {
if ($section eq "ttable-file") {
chomp;
my ($type, $b, $c, $d, $fn) = split(/ /, $_, 5);
if ( $type eq '8' ) {
# suffix arrays model: <src-corpus> <tgt-corpus> <alignment>.
my ($src, $tgt, $align) = split(/ /, $fn);
my $abs_src = ensure_absolute($src, $ini);
die "File not found or empty: $src (searched for $abs_src or $abs_src.binphr.idx)"
if ! -s $abs_src && ! -s $abs_src.".binphr.idx"; # accept binarized ttables
my $abs_tgt = ensure_absolute($tgt, $ini);
die "File not found or empty: $tgt (searched for $abs_tgt or $abs_tgt.binphr.idx)"
if ! -s $abs_tgt && ! -s $abs_tgt.".binphr.idx"; # accept binarized ttables
my $abs_align = ensure_absolute($align, $ini);
die "File not found or empty: $align (searched for $abs_align or $abs_align.binphr.idx)"
if ! -s $abs_align && ! -s $abs_align.".binphr.idx"; # accept binarized ttables
$_ = "$type $b $c $d $abs_src $abs_tgt $abs_align\n";
}
elsif ( $type eq '12' ) {
$abs = ensure_absolute($fn, $ini);
die "File not found or empty: $fn (searched for $abs.minphr)"
if ! -s $abs.".minphr"; # accept compact binarized ttables
$_ = "$type $b $c $d $abs\n";
}
else {
$abs = ensure_absolute($fn, $ini);
die "File not found or empty: $fn (searched for $abs or $abs.binphr.idx)"
if ! -s $abs && ! -s $abs.".binphr.idx"; # accept binarized ttables
$_ = "$type $b $c $d $abs\n";
}
}
if ($section eq "generation-file" || $section eq "lmodel-file") {
chomp;
my ($a, $b, $c, $fn) = split / /;
$abs = ensure_absolute($fn, $ini);
die "File not found or empty: $fn (searched for $abs)"
if ! -s $abs;
$_ = "$a $b $c $abs\n";
}
if ($section eq "distortion-file") {
chomp;
my ($a, $b, $c, $fn) = split / /;
$abs = ensure_absolute($fn, $ini);
die "File not found or empty: $fn (searched for $abs or $abs.binlexr.idx or $abs.minlexr)"
if ! -s $abs && ! -s $abs.".binlexr.idx" && ! -s $abs.".minlexr"; # accept binarized and compact lexro models
$_ = "$a $b $c $abs\n";
}
}
print $_;
}
close $inih if $ini ne "-";
sub safesystem {
print STDERR "Executing: @_\n";
system(@_);
if ($? == -1) {
print STDERR "Failed to execute: @_\n $!\n";
exit(1);
}
elsif ($? & 127) {
printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
($? & 127), ($? & 128) ? 'with' : 'without';
}
else {
my $exitcode = $? >> 8;
print STDERR "Exit code: $exitcode\n" if $exitcode;
return ! $exitcode;
}
}
sub ensure_absolute {
my $target = shift;
my $originfile = shift;
my $cwd = `pawd 2> /dev/null`;
$cwd = `pwd` if ! defined $cwd; # not everyone has pawd!
die "Failed to absolutize $target. Failing to get cwd!" if ! defined $cwd;
chomp $cwd;
$cwd.="/";
my $absorigin = ensure_relative_to_origin($originfile, $cwd);
return ensure_relative_to_origin($target, $absorigin);
}
sub ensure_relative_to_origin {
my $target = shift;
my $originfile = shift;
return $target if $target =~ /^\/|^~/; # the target path is absolute already
$originfile =~ s/[^\/]*$//; # where does the origin reside
my $out = $originfile."/".$target;
$out =~ s/\/+/\//g;
$out =~ s/\/(\.\/)+/\//g;
return $out;
}
sub my_open {
my $f = shift;
if ($f eq "-") {
binmode(STDIN, ":utf8");
return *STDIN;
}
die "Not found: $f" if ! -e $f;
my $opn;
my $hdl;
my $ft = `file '$f'`;
# file might not recognize some files!
if ($f =~ /\.gz$/ || $ft =~ /gzip compressed data/) {
$opn = "zcat '$f' |";
} elsif ($f =~ /\.bz2$/ || $ft =~ /bzip2 compressed data/) {
$opn = "bzcat '$f' |";
} else {
$opn = "$f";
}
open $hdl, $opn or die "Can't open '$opn': $!";
binmode $hdl, ":utf8";
return $hdl;
}
|