|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use warnings; |
|
use strict; |
|
|
|
BEGIN |
|
{ |
|
my $wd= `pawd 2>/dev/null`; |
|
if (!$wd) {$wd = `pwd`;} |
|
chomp $wd; |
|
push @INC, "$wd/perllib/sun4-solaris"; |
|
} |
|
use lib "perllib/sun4-solaris/auto/GD"; |
|
use GD; |
|
use GD::Image; |
|
use GD::Polygon; |
|
|
|
|
|
my $infilename = shift @ARGV; |
|
open(INPUT, "<$infilename") or die "couldn't open '$infilename' for read: $!\n"; |
|
my @sentenceData = ([], [], [], []); |
|
my $curSentence = -1; |
|
my @numSrcChars = (); |
|
my @numTgtChars = (); |
|
my @numSrcPhrases = (); |
|
my @numTgtPhrases = (); |
|
my ($sentenceSrcChars, $sentenceTgtChars, $sentenceSrcPhrases, $sentenceTgtPhrases, $tgtWords); |
|
my $mode = 'none'; |
|
while(my $line = <INPUT>) |
|
{ |
|
$mode = 'none' if $line =~ /^\s*\n$/; |
|
if($line =~ /TRANSLATION HYPOTHESIS DETAILS:/) |
|
{ |
|
$mode = 'opts'; |
|
if($curSentence > -1) |
|
{ |
|
push @numSrcChars, $sentenceSrcChars; |
|
push @numTgtChars, $sentenceTgtChars; |
|
push @numSrcPhrases, $sentenceSrcPhrases; |
|
push @numTgtPhrases, $sentenceTgtPhrases; |
|
} |
|
$curSentence++; |
|
$sentenceSrcChars = 0; $sentenceTgtChars = 0; |
|
$sentenceSrcPhrases = 0; $sentenceTgtPhrases = 0; |
|
$tgtWords = 0; |
|
push @{$sentenceData[0]}, []; |
|
push @{$sentenceData[1]}, []; |
|
push @{$sentenceData[2]}, 0; |
|
} |
|
elsif($line =~ /SOURCE\/TARGET SPANS:/) {$mode = 'spans';} |
|
elsif($line =~ /WORDS\/PHRASES DROPPED:/) {$mode = 'drops';} |
|
|
|
elsif($mode eq 'opts') |
|
{ |
|
die "can't parse translation-options info for sentence $curSentence" unless $line =~ /SOURCE:\s+\[(\d+)\.\.(\d+)\]\s+(\S(.*\S)?)\s*$/; |
|
my %details; |
|
$details{'srcStart'} = $1; |
|
$details{'srcEnd'} = $2; |
|
my @srcFactors = map {my @f = split(/\|/, $_); \@f;} (split(/\s+/, $3)); |
|
$details{'srcText'} = \@srcFactors; |
|
$sentenceData[2]->[$curSentence] += $2 - $1 + 1; |
|
$details{'srcNumChars'} = 0; |
|
foreach my $word (@srcFactors) {$details{'srcNumChars'} += maxN(map {length($_)} @$word) + 1;} |
|
$sentenceSrcChars += --$details{'srcNumChars'}; |
|
$sentenceSrcPhrases++; |
|
$line = <INPUT>; |
|
die "can't parse translation-options info for sentence $curSentence" unless $line =~ /TRANSLATED AS:\s+(\S(.*\S)?)\s*$/; |
|
my @words = split(/\s+/, $1); |
|
if($words[0] eq '<EPSILON>') {@words = ();} |
|
else {$sentenceTgtPhrases++;} |
|
my @tgtFactors = map {my @f = split(/\|/, $_); \@f;} (split(/\s+/, $1)); |
|
$details{'tgtText'} = \@tgtFactors; |
|
$details{'tgtNumChars'} = 0; |
|
foreach my $word (@tgtFactors) {$details{'tgtNumChars'} += maxN(map {length($_)} @$word) + 1;} |
|
$sentenceTgtChars += --$details{'tgtNumChars'}; |
|
$details{'tgtStart'} = $tgtWords; |
|
$tgtWords += scalar(@words); |
|
$details{'tgtEnd'} = $tgtWords - 1; |
|
push @{$sentenceData[0]->[$curSentence]}, \%details; |
|
} |
|
elsif($mode eq 'drops') |
|
{ |
|
die "can't parse dropped-words info for sentence $curSentence\n" unless $line =~ /\s*(\S.*\S)\s*/; |
|
my @factors = split(/\|/, $1); |
|
push @{$sentenceData[1]->[$curSentence]}, \@factors; |
|
} |
|
} |
|
close(INPUT); |
|
|
|
push @numSrcChars, $sentenceSrcChars; |
|
push @numTgtChars, $sentenceTgtChars; |
|
push @numSrcPhrases, $sentenceSrcPhrases; |
|
push @numTgtPhrases, $sentenceTgtPhrases; |
|
|
|
|
|
my ($totalPhraseLength, $totalNumPhrases) = (0, 0); |
|
for(my $i = 0; $i < scalar(@{$sentenceData[0]}); $i++) |
|
{ |
|
$totalPhraseLength += $sentenceData[2]->[$i]; |
|
$totalNumPhrases += scalar(@{$sentenceData[0]->[$i]}); |
|
$sentenceData[2]->[$i] /= scalar(@{$sentenceData[0]->[$i]}); |
|
} |
|
|
|
|
|
die "infilename ends in slash! should not be a directory\n" if $infilename !~ /\/([^\/]+)$/; |
|
my $imgdir = "phraseImgs-tmp/${1}_" . time; |
|
`mkdir -p $imgdir`; |
|
my ($srcNumFactors, $tgtNumFactors) = (scalar(@{$sentenceData[0]->[0]->[0]->{'srcText'}->[0]}), scalar(@{$sentenceData[0]->[0]->[0]->{'tgtText'}->[0]})); |
|
my $font = gdLargeFont; |
|
my ($topMargin, $bottomMargin, $leftMargin, $rightMargin) = (1, 1, 1, 1); |
|
my $phraseEdgeHSpace = int($font->width / 2); |
|
my $phraseEdgeVSpace = 1; |
|
my $middleVSpace = $font->height + 6; |
|
|
|
|
|
my $srcY = $topMargin + $phraseEdgeVSpace; |
|
my @srcFactorYs; |
|
for(my $i = 0; $i < $srcNumFactors; $i++) {push @srcFactorYs, $srcY + ($font->height + $phraseEdgeVSpace) * $i;} |
|
my @tgtFactorYs; |
|
my $tgtY = $srcY + ($font->height + $phraseEdgeVSpace) * $srcNumFactors + $middleVSpace + $phraseEdgeVSpace; |
|
for(my $i = 0; $i < $tgtNumFactors; $i++) {push @tgtFactorYs, $tgtY + ($font->height + $phraseEdgeVSpace) * $i;} |
|
|
|
for(my $i = 0; $i < scalar(@{$sentenceData[0]}); $i++) |
|
{ |
|
|
|
my $img = new GD::Image($leftMargin + $rightMargin + max($font->width * $numSrcChars[$i] + $numSrcPhrases[$i] * 2 * $phraseEdgeHSpace, |
|
$font->width * $numTgtChars[$i] + $numTgtPhrases[$i] * 2 * $phraseEdgeHSpace), |
|
$topMargin + $bottomMargin + $middleVSpace + ($font->height + $phraseEdgeVSpace) * ($srcNumFactors + $tgtNumFactors) + 2 * $phraseEdgeVSpace); |
|
|
|
my $white = $img->colorAllocate(255, 255, 255); |
|
$img->transparent($white); |
|
my $black = $img->colorAllocate(0, 0, 0); |
|
my $highlightCol = $img->colorAllocate(255, 0, 0); |
|
my @bgCols = |
|
( |
|
$img->colorAllocate(165, 255, 138), |
|
$img->colorAllocate(237, 239, 133), |
|
$img->colorAllocate(255, 200, 72), |
|
$img->colorAllocate(255, 172, 98), |
|
$img->colorAllocate(255, 151, 151), |
|
$img->colorAllocate(254, 152, 241), |
|
$img->colorAllocate(170, 170, 255), |
|
$img->colorAllocate(165, 254, 250) |
|
); |
|
$img->setThickness(2); |
|
|
|
my @srcPhraseIndices = (); |
|
my @srcBGCols = () x $numSrcPhrases[$i]; |
|
my $nextWord = 0; |
|
while(scalar(@srcPhraseIndices) < $numSrcPhrases[$i]) |
|
{ |
|
for(my $k = 0; $k < $numSrcPhrases[$i]; $k++) |
|
{ |
|
if($sentenceData[0]->[$i]->[$k]->{'srcStart'} == $nextWord) |
|
{ |
|
$srcBGCols[$k] = scalar(@srcPhraseIndices) % scalar(@bgCols); |
|
push @srcPhraseIndices, $k; |
|
$nextWord = $sentenceData[0]->[$i]->[$k]->{'srcEnd'} + 1; |
|
last; |
|
} |
|
} |
|
} |
|
|
|
my @srcStartX = () x $numSrcPhrases[$i]; |
|
my $srcX = $leftMargin; |
|
for(my $j = 0; $j < $numSrcPhrases[$i]; $j++) |
|
{ |
|
$srcStartX[$j] = $srcX; |
|
$srcX += $font->width * $sentenceData[0]->[$i]->[$j]->{'srcNumChars'} + 2 * $phraseEdgeHSpace; |
|
} |
|
|
|
my @tgtStartX = () x $numSrcPhrases[$i]; |
|
my $tgtX = $leftMargin; |
|
for(my $j = 0; $j < $numSrcPhrases[$i]; $j++) |
|
{ |
|
my $k = $srcPhraseIndices[$j]; |
|
if(length($sentenceData[0]->[$i]->[$k]->{'tgtText'}) > 0) |
|
{ |
|
$tgtStartX[$j] = $tgtX; |
|
$tgtX += $font->width * $sentenceData[0]->[$i]->[$k]->{'tgtNumChars'} + 2 * $phraseEdgeHSpace; |
|
} |
|
} |
|
|
|
$img->filledRectangle(0, 0, $img->width, $img->height, $white); |
|
|
|
for(my $j = 0; $j < $numSrcPhrases[$i]; $j++) |
|
{ |
|
my $k = $srcPhraseIndices[$j]; |
|
my $srcBottomY = $srcY + ($font->height + $phraseEdgeVSpace) * $srcNumFactors; |
|
$img->filledRectangle($srcStartX[$k], $srcY - $phraseEdgeVSpace, $srcStartX[$k] + $font->width * $sentenceData[0]->[$i]->[$k]->{'srcNumChars'} + 2 * $phraseEdgeHSpace, |
|
$srcBottomY, $bgCols[$srcBGCols[$k]]); |
|
if(length $sentenceData[0]->[$i]->[$k]->{'tgtText'} > 0) |
|
{ |
|
$img->filledRectangle($tgtStartX[$j], $tgtY - $phraseEdgeVSpace, $tgtStartX[$j] + $font->width * $sentenceData[0]->[$i]->[$k]->{'tgtNumChars'} + 2 * $phraseEdgeHSpace, |
|
$tgtY + ($font->height + $phraseEdgeVSpace) * $tgtNumFactors, $bgCols[$srcBGCols[$k]]); |
|
my ($srcMidX, $tgtMidX) = ($srcStartX[$k] + $font->width * $sentenceData[0]->[$i]->[$k]->{'srcNumChars'} / 2 + $phraseEdgeHSpace, |
|
$tgtStartX[$j] + $font->width * $sentenceData[0]->[$i]->[$k]->{'tgtNumChars'} / 2 + $phraseEdgeHSpace); |
|
$img->line($srcMidX, $srcBottomY, $tgtMidX, $tgtY, $bgCols[$srcBGCols[$k]]); |
|
writeFactoredStringGD($img, $srcStartX[$k] + $phraseEdgeHSpace, \@srcFactorYs, $sentenceData[0]->[$i]->[$k]->{'srcText'}, $font, $black); |
|
writeFactoredStringGD($img, $tgtStartX[$j] + $phraseEdgeHSpace, \@tgtFactorYs, $sentenceData[0]->[$i]->[$k]->{'tgtText'}, $font, $black); |
|
} |
|
else |
|
{ |
|
writeFactoredStringGD($img, $srcStartX[$k] + $phraseEdgeHSpace, \@srcFactorYs, $sentenceData[0]->[$i]->[$k]->{'srcText'}, $font, $highlightCol); |
|
} |
|
} |
|
|
|
my $imgfilename = "$imgdir/$i.png"; |
|
open(IMAGE, ">$imgfilename") or die "couldn't create tmp image '$imgfilename': $!\n"; |
|
print IMAGE $img->png(); |
|
close(IMAGE); |
|
} |
|
|
|
|
|
my $stylesheet = <<EOHTML; |
|
<style type="text/css"> |
|
div.sentence {} |
|
</style> |
|
EOHTML |
|
print "<html><head><title>Translation Options Used</title>$stylesheet</head><body>\n"; |
|
print "<span style=\"font-size:large\"><b>Overall Average Phrase Length:</b> " . sprintf("%.3lf", $totalPhraseLength / $totalNumPhrases) . "</span><p>\n"; |
|
for(my $i = 0; $i < scalar(@{$sentenceData[0]}); $i++) |
|
{ |
|
if($i > 0) {print "<hr width=98%>";} |
|
print "<div class=\"sentence\"><b>Average Phrase Length:</b> " . sprintf("%.3lf", $sentenceData[2]->[$i]) . "<p><img src=\"$imgdir/$i.png\"></div>\n"; |
|
} |
|
print "</body></html>"; |
|
|
|
|
|
|
|
|
|
sub max |
|
{ |
|
my ($a, $b) = @_; |
|
return ($a > $b) ? $a : $b; |
|
} |
|
|
|
|
|
sub maxN |
|
{ |
|
die "maxN(): empty array!\n" if scalar(@_) == 0; |
|
my $max = shift @_; |
|
map {$max = $_ if $_ > $max;} @_; |
|
return $max; |
|
} |
|
|
|
|
|
sub writeFactoredStringGD |
|
{ |
|
my ($img, $startX, $ys, $factors, $font, $color) = @_; |
|
foreach my $word (@$factors) |
|
{ |
|
for(my $i = 0; $i < scalar(@$ys); $i++) |
|
{ |
|
$img->string($font, $startX, $ys->[$i], $word->[$i], $color); |
|
} |
|
$startX += $font->width * (maxN(map {length($_)} @$word) + 1); |
|
} |
|
} |
|
|