|
#include <sstream> |
|
#include "Manager.h" |
|
#include "PVertex.h" |
|
#include "moses/OutputCollector.h" |
|
#include "moses/Util.h" |
|
|
|
namespace Moses |
|
{ |
|
namespace Syntax |
|
{ |
|
|
|
Manager::Manager(ttasksptr const& ttask) |
|
: Moses::BaseManager(ttask) |
|
{ } |
|
|
|
void Manager::OutputBest(OutputCollector *collector) const |
|
{ |
|
if (!collector) { |
|
return; |
|
} |
|
std::ostringstream out; |
|
FixPrecision(out); |
|
const SHyperedge *best = GetBestSHyperedge(); |
|
if (best == NULL) { |
|
VERBOSE(1, "NO BEST TRANSLATION" << std::endl); |
|
if (options()->output.ReportHypoScore) { |
|
out << "0 "; |
|
} |
|
out << '\n'; |
|
} else { |
|
if (options()->output.ReportHypoScore) { |
|
out << best->label.futureScore << " "; |
|
} |
|
Phrase yield = GetOneBestTargetYield(*best); |
|
|
|
UTIL_THROW_IF2(yield.GetSize() < 2, |
|
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); |
|
yield.RemoveWord(0); |
|
yield.RemoveWord(yield.GetSize()-1); |
|
out << yield.GetStringRep(options()->output.factor_order); |
|
out << '\n'; |
|
} |
|
collector->Write(m_source.GetTranslationId(), out.str()); |
|
} |
|
|
|
void Manager::OutputNBest(OutputCollector *collector) const |
|
{ |
|
if (collector) { |
|
long translationId = m_source.GetTranslationId(); |
|
KBestExtractor::KBestVec nBestList; |
|
ExtractKBest(options()->nbest.nbest_size, nBestList, |
|
options()->nbest.only_distinct); |
|
OutputNBestList(collector, nBestList, translationId); |
|
} |
|
} |
|
|
|
void Manager::OutputUnknowns(OutputCollector *collector) const |
|
{ |
|
if (collector) { |
|
long translationId = m_source.GetTranslationId(); |
|
|
|
std::ostringstream out; |
|
for (boost::unordered_set<Moses::Word>::const_iterator p = m_oovs.begin(); |
|
p != m_oovs.end(); ++p) { |
|
out << *p; |
|
} |
|
out << std::endl; |
|
collector->Write(translationId, out.str()); |
|
} |
|
} |
|
|
|
void Manager::OutputNBestList(OutputCollector *collector, |
|
const KBestExtractor::KBestVec &nBestList, |
|
long translationId) const |
|
{ |
|
const std::vector<FactorType> &outputFactorOrder = options()->output.factor_order; |
|
|
|
std::ostringstream out; |
|
|
|
if (collector->OutputIsCout()) { |
|
|
|
|
|
FixPrecision(out); |
|
} |
|
|
|
bool includeWordAlignment = options()->nbest.include_alignment_info; |
|
bool PrintNBestTrees = options()->nbest.print_trees; |
|
|
|
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); |
|
p != nBestList.end(); ++p) { |
|
const KBestExtractor::Derivation &derivation = **p; |
|
|
|
|
|
Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation); |
|
|
|
|
|
UTIL_THROW_IF2(outputPhrase.GetSize() < 2, |
|
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); |
|
outputPhrase.RemoveWord(0); |
|
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); |
|
|
|
|
|
out << translationId << " ||| "; |
|
OutputSurface(out, outputPhrase); |
|
out << " ||| "; |
|
bool with_labels = options()->nbest.include_feature_labels; |
|
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels); |
|
out << " ||| " << derivation.score; |
|
|
|
|
|
if (includeWordAlignment) { |
|
out << " ||| "; |
|
Alignments align; |
|
OutputAlignmentNBest(align, derivation, 0); |
|
for (Alignments::const_iterator q = align.begin(); q != align.end(); |
|
++q) { |
|
out << q->first << "-" << q->second << " "; |
|
} |
|
} |
|
|
|
|
|
if (PrintNBestTrees) { |
|
TreePointer tree = KBestExtractor::GetOutputTree(derivation); |
|
out << " ||| " << tree->GetString(); |
|
} |
|
|
|
out << std::endl; |
|
} |
|
|
|
assert(collector); |
|
collector->Write(translationId, out.str()); |
|
} |
|
|
|
std::size_t Manager::OutputAlignmentNBest( |
|
Alignments &retAlign, |
|
const KBestExtractor::Derivation &derivation, |
|
std::size_t startTarget) const |
|
{ |
|
const SHyperedge ­peredge = derivation.edge->shyperedge; |
|
|
|
std::size_t totalTargetSize = 0; |
|
std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos(); |
|
|
|
const TargetPhrase &tp = *(shyperedge.label.translation); |
|
|
|
std::size_t thisSourceSize = CalcSourceSize(derivation); |
|
|
|
|
|
|
|
std::vector<std::size_t> sourceOffsets(thisSourceSize, 0); |
|
std::vector<std::size_t> targetOffsets(tp.GetSize(), 0); |
|
|
|
const AlignmentInfo &aiNonTerm = |
|
shyperedge.label.translation->GetAlignNonTerm(); |
|
std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap(); |
|
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = |
|
aiNonTerm.GetNonTermIndexMap(); |
|
|
|
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(), |
|
"Error"); |
|
|
|
std::size_t targetInd = 0; |
|
for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { |
|
if (tp.GetWord(targetPos).IsNonTerminal()) { |
|
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); |
|
std::size_t sourceInd = targetPos2SourceInd[targetPos]; |
|
std::size_t sourcePos = sourceInd2pos[sourceInd]; |
|
|
|
const KBestExtractor::Derivation &subderivation = |
|
*derivation.subderivations[sourceInd]; |
|
|
|
|
|
std::size_t sourceSize = |
|
subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered(); |
|
sourceOffsets[sourcePos] = sourceSize; |
|
|
|
|
|
|
|
std::size_t currStartTarget = startTarget + totalTargetSize; |
|
std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation, |
|
currStartTarget); |
|
targetOffsets[targetPos] = targetSize; |
|
|
|
totalTargetSize += targetSize; |
|
++targetInd; |
|
} else { |
|
++totalTargetSize; |
|
} |
|
} |
|
|
|
|
|
|
|
ShiftOffsets(sourceOffsets, startSource); |
|
ShiftOffsets(targetOffsets, startTarget); |
|
|
|
|
|
const AlignmentInfo &aiTerm = shyperedge.label.translation->GetAlignTerm(); |
|
|
|
|
|
AlignmentInfo::const_iterator iter; |
|
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) { |
|
const std::pair<std::size_t, std::size_t> &align = *iter; |
|
std::size_t relSource = align.first; |
|
std::size_t relTarget = align.second; |
|
std::size_t absSource = sourceOffsets[relSource]; |
|
std::size_t absTarget = targetOffsets[relTarget]; |
|
|
|
std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget); |
|
std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint); |
|
UTIL_THROW_IF2(!ret.second, "Error"); |
|
} |
|
|
|
return totalTargetSize; |
|
} |
|
|
|
std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const |
|
{ |
|
const SHyperedge ­peredge = d.edge->shyperedge; |
|
std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered(); |
|
for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) { |
|
std::size_t childSize = |
|
shyperedge.tail[i]->pvertex->span.GetNumWordsCovered(); |
|
ret -= (childSize - 1); |
|
} |
|
return ret; |
|
} |
|
|
|
} |
|
} |
|
|