|
|
|
|
|
use strict; |
|
|
|
use Getopt::Long; |
|
use File::Basename; |
|
use FindBin qw($RealBin); |
|
|
|
sub systemCheck($); |
|
|
|
my $mosesDir = "$RealBin/../.."; |
|
my $ptPath; |
|
my $lexRoPath; |
|
my $outPath; |
|
my $numScores = 4; |
|
my $numLexScores; |
|
my $pruneNum = 100; |
|
my $scfg = 0; |
|
|
|
GetOptions("phrase-table=s" => \$ptPath, |
|
"lex-ro=s" => \$lexRoPath, |
|
"output-dir=s" => \$outPath, |
|
"num-scores=s" => \$numScores, |
|
"num-lex-scores=i" => \$numLexScores, |
|
"prune=i" => \$pruneNum, |
|
"scfg" => \$scfg |
|
) or exit 1; |
|
|
|
|
|
die("ERROR: please set --phrase-table") unless defined($ptPath); |
|
|
|
die("ERROR: please set --output-dir") unless defined($outPath); |
|
|
|
die("ERROR: compile contrib/sigtest-filter") if (!-X "$mosesDir/contrib/sigtest-filter/filter-pt"); |
|
die("ERROR: compile with bjam --with-cmph") if (!-X "$mosesDir/bin/processLexicalTableMin"); |
|
die("ERROR: compile with bjam --with-xmlrpc-c") if (!-X "$mosesDir/bin/CreateProbingPT"); |
|
|
|
my $cmd; |
|
|
|
my $tempPath = dirname($outPath) ."/tmp.$$"; |
|
`mkdir -p $tempPath`; |
|
|
|
$cmd = "gzip -dc $ptPath | $mosesDir/contrib/sigtest-filter/filter-pt -n $pruneNum | gzip -c > $tempPath/pt.gz"; |
|
systemCheck($cmd); |
|
|
|
if (defined($lexRoPath)) { |
|
die("ERROR: please set --num-lex-scores") unless defined($numLexScores); |
|
|
|
$cmd = "$mosesDir/bin/processLexicalTableMin -in $lexRoPath -out $tempPath/lex-ro -T . -threads all"; |
|
systemCheck($cmd); |
|
|
|
$cmd = "$mosesDir/bin/addLexROtoPT $tempPath/pt.gz $tempPath/lex-ro.minlexr | gzip -c > $tempPath/pt.withLexRO.gz"; |
|
systemCheck($cmd); |
|
|
|
$cmd = "ln -s pt.withLexRO.gz $tempPath/pt.txt.gz"; |
|
systemCheck($cmd); |
|
} |
|
else { |
|
$cmd = "ln -s pt.gz $tempPath/pt.txt.gz"; |
|
systemCheck($cmd); |
|
} |
|
|
|
$cmd = "$mosesDir/bin/CreateProbingPT2 --num-scores $numScores --log-prob --input-pt $tempPath/pt.txt.gz --output-dir $outPath"; |
|
|
|
if (defined($lexRoPath)) { |
|
$cmd .= " --num-lex-scores $numLexScores"; |
|
} |
|
|
|
if ($scfg) { |
|
$cmd .= " --scfg"; |
|
} |
|
|
|
systemCheck($cmd); |
|
|
|
exit(0); |
|
|
|
|
|
sub systemCheck($) |
|
{ |
|
my $cmd = shift; |
|
print STDERR "Executing: $cmd\n"; |
|
|
|
my $retVal = system($cmd); |
|
if ($retVal != 0) |
|
{ |
|
exit(1); |
|
} |
|
} |
|
|