|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use warnings; |
|
use strict; |
|
use Getopt::Long; |
|
|
|
binmode(STDIN, ":utf8"); |
|
binmode(STDOUT, ":utf8"); |
|
binmode(STDERR, ":utf8"); |
|
|
|
my $filelist; |
|
my $ignore_final_state_cost = 0; |
|
my $mangle_weights = undef; |
|
GetOptions( |
|
"ignore-final-state-cost" => \$ignore_final_state_cost, |
|
|
|
|
|
"filelist|fl=s" => \$filelist, |
|
"mangle-weights=s" => \$mangle_weights, |
|
) or exit 1; |
|
|
|
my @infiles; |
|
if (defined $filelist) { |
|
my $fh = my_open($filelist); |
|
while (<$fh>) { |
|
chomp; |
|
push @infiles, $_; |
|
} |
|
close $fh; |
|
} |
|
push @infiles, @ARGV; |
|
@ARGV = (); |
|
if (0 == scalar(@infiles)) { |
|
print STDERR "Reading input from stdin\n"; |
|
push @infiles, "-"; |
|
} |
|
|
|
my $err = 0; |
|
foreach my $inf (@infiles) { |
|
my $nr = 0; |
|
NEXTLATTICE: |
|
my %usedids = (); |
|
my %usedtgtids = (); |
|
my @outnodes = (); |
|
my $fh = my_open($inf); |
|
my %is_final; |
|
while (<$fh>) { |
|
chomp; |
|
$nr++; |
|
last if $_ eq ""; |
|
my ($src, $tgt, $label, $weight) = split /\s+/; |
|
die "$inf:$nr:Bad src node index: $src" if $src !~ /^[0-9]+$/; |
|
|
|
if (!defined $label && !defined $weight) { |
|
|
|
|
|
$is_final{$src}; |
|
|
|
die "$inf:$nr:Final state $src has cost $tgt. Unsupported, use --ignore-final-state-cost" |
|
if defined $tgt && !$ignore_final_state_cost; |
|
|
|
next; |
|
} |
|
$weight = 0 if !defined $weight; |
|
|
|
$usedids{$src} = 1; |
|
$usedtgtids{$tgt} = 1; |
|
|
|
|
|
|
|
|
|
if (defined $mangle_weights) { |
|
if ($mangle_weights eq "expneg") { |
|
$weight = join(",", map {exp(-$_)} split /,/, $weight); |
|
} else { |
|
die "Bad weights mangling: $mangle_weights"; |
|
} |
|
} |
|
|
|
my $targetnode = $tgt-$src; |
|
die "$inf:$nr:Not topologically sorted, got arc from $src to $tgt" |
|
if $targetnode <= 0; |
|
push @{$outnodes[$src]}, [ $label, $weight, $tgt ]; |
|
} |
|
if (eof($fh)) { |
|
close $fh; |
|
$fh = undef; |
|
} |
|
|
|
|
|
my %denseids = (); |
|
my $nextid = 0; |
|
foreach my $id (sort {$a<=>$b} keys %usedids) { |
|
$denseids{$id} = $nextid; |
|
$nextid++; |
|
} |
|
|
|
foreach my $id (keys %usedtgtids) { |
|
next if defined $denseids{$id}; |
|
$denseids{$id} = $nextid; |
|
} |
|
|
|
foreach my $f (keys %is_final) { |
|
if (defined $outnodes[$f]) { |
|
print STDERR "$inf:Node $f is final but it has outgoing edges!\n"; |
|
$err = 1; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
print "("; |
|
for(my $origsrc = 0; $origsrc < @outnodes; $origsrc++) { |
|
my $src = $denseids{$origsrc}; |
|
next if !defined $src; |
|
next if $src == $nextid; |
|
my $outnode = $outnodes[$origsrc]; |
|
print "("; |
|
foreach my $arc (@$outnode) { |
|
my $origtgt = $arc->[2]; |
|
my $tgt = $denseids{$origtgt}; |
|
if (!defined $tgt) { |
|
|
|
$tgt = $denseids{$origtgt} = $nextid; |
|
$nextid++; |
|
} |
|
my $step_to_target = $tgt - $src; |
|
die "$inf:Bug, I damaged top-sortedness (orig $origsrc .. $origtgt; curr $src .. $tgt)." if $step_to_target <= 0; |
|
print "('".apo($arc->[0])."',$arc->[1],$step_to_target),"; |
|
} |
|
print "),"; |
|
} |
|
print ")\n"; |
|
goto NEXTLATTICE if defined $fh && ! eof($fh); |
|
} |
|
die "There were errors." if $err; |
|
|
|
sub apo { |
|
my $s = shift; |
|
|
|
$s =~ s/\\/\\\\/g; |
|
$s =~ s/(['])/\\$1/g; |
|
return $s; |
|
} |
|
|
|
sub my_open { |
|
my $f = shift; |
|
if ($f eq "-") { |
|
binmode(STDIN, ":utf8"); |
|
return *STDIN; |
|
} |
|
|
|
die "Not found: $f" if ! -e $f; |
|
|
|
my $opn; |
|
my $hdl; |
|
my $ft = `file '$f'`; |
|
|
|
if ($f =~ /\.gz$/ || $ft =~ /gzip compressed data/) { |
|
$opn = "zcat '$f' |"; |
|
} elsif ($f =~ /\.bz2$/ || $ft =~ /bzip2 compressed data/) { |
|
$opn = "bzcat '$f' |"; |
|
} else { |
|
$opn = "$f"; |
|
} |
|
open $hdl, $opn or die "Can't open '$opn': $!"; |
|
binmode $hdl, ":utf8"; |
|
return $hdl; |
|
} |
|
|