File size: 7,528 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
#include <sstream>
#include "Manager.h"
#include "PVertex.h"
#include "moses/OutputCollector.h"
#include "moses/Util.h"
namespace Moses
{
namespace Syntax
{
Manager::Manager(ttasksptr const& ttask)
: Moses::BaseManager(ttask)
{ }
void Manager::OutputBest(OutputCollector *collector) const
{
if (!collector) {
return;
}
std::ostringstream out;
FixPrecision(out);
const SHyperedge *best = GetBestSHyperedge();
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (options()->output.ReportHypoScore) {
out << "0 ";
}
out << '\n';
} else {
if (options()->output.ReportHypoScore) {
out << best->label.futureScore << " ";
}
Phrase yield = GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(options()->output.factor_order);
out << '\n';
}
collector->Write(m_source.GetTranslationId(), out.str());
}
void Manager::OutputNBest(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
KBestExtractor::KBestVec nBestList;
ExtractKBest(options()->nbest.nbest_size, nBestList,
options()->nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
}
}
void Manager::OutputUnknowns(OutputCollector *collector) const
{
if (collector) {
long translationId = m_source.GetTranslationId();
std::ostringstream out;
for (boost::unordered_set<Moses::Word>::const_iterator p = m_oovs.begin();
p != m_oovs.end(); ++p) {
out << *p;
}
out << std::endl;
collector->Write(translationId, out.str());
}
}
void Manager::OutputNBestList(OutputCollector *collector,
const KBestExtractor::KBestVec &nBestList,
long translationId) const
{
const std::vector<FactorType> &outputFactorOrder = options()->output.factor_order;
std::ostringstream out;
if (collector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
FixPrecision(out);
}
bool includeWordAlignment = options()->nbest.include_alignment_info;
bool PrintNBestTrees = options()->nbest.print_trees; // PrintNBestTrees();
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
const KBestExtractor::Derivation &derivation = **p;
// get the derivation's target-side yield
Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
bool with_labels = options()->nbest.include_feature_labels;
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
out << " ||| " << derivation.score;
// optionally, print word alignments
if (includeWordAlignment) {
out << " ||| ";
Alignments align;
OutputAlignmentNBest(align, derivation, 0);
for (Alignments::const_iterator q = align.begin(); q != align.end();
++q) {
out << q->first << "-" << q->second << " ";
}
}
// optionally, print tree
if (PrintNBestTrees) {
TreePointer tree = KBestExtractor::GetOutputTree(derivation);
out << " ||| " << tree->GetString();
}
out << std::endl;
}
assert(collector);
collector->Write(translationId, out.str());
}
std::size_t Manager::OutputAlignmentNBest(
Alignments &retAlign,
const KBestExtractor::Derivation &derivation,
std::size_t startTarget) const
{
const SHyperedge ­peredge = derivation.edge->shyperedge;
std::size_t totalTargetSize = 0;
std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
const TargetPhrase &tp = *(shyperedge.label.translation);
std::size_t thisSourceSize = CalcSourceSize(derivation);
// position of each terminal word in translation rule, irrespective of
// alignment if non-term, number is undefined
std::vector<std::size_t> sourceOffsets(thisSourceSize, 0);
std::vector<std::size_t> targetOffsets(tp.GetSize(), 0);
const AlignmentInfo &aiNonTerm =
shyperedge.label.translation->GetAlignNonTerm();
std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
aiNonTerm.GetNonTermIndexMap();
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
"Error");
std::size_t targetInd = 0;
for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
std::size_t sourceInd = targetPos2SourceInd[targetPos];
std::size_t sourcePos = sourceInd2pos[sourceInd];
const KBestExtractor::Derivation &subderivation =
*derivation.subderivations[sourceInd];
// calc source size
std::size_t sourceSize =
subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
sourceOffsets[sourcePos] = sourceSize;
// calc target size.
// Recursively look thru child hypos
std::size_t currStartTarget = startTarget + totalTargetSize;
std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
++targetInd;
} else {
++totalTargetSize;
}
}
// convert position within translation rule to absolute position within
// source sentence / output sentence
ShiftOffsets(sourceOffsets, startSource);
ShiftOffsets(targetOffsets, startTarget);
// get alignments from this hypo
const AlignmentInfo &aiTerm = shyperedge.label.translation->GetAlignTerm();
// add to output arg, offsetting by source & target
AlignmentInfo::const_iterator iter;
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
const std::pair<std::size_t, std::size_t> &align = *iter;
std::size_t relSource = align.first;
std::size_t relTarget = align.second;
std::size_t absSource = sourceOffsets[relSource];
std::size_t absTarget = targetOffsets[relTarget];
std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget);
std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
UTIL_THROW_IF2(!ret.second, "Error");
}
return totalTargetSize;
}
std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
{
const SHyperedge ­peredge = d.edge->shyperedge;
std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
std::size_t childSize =
shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
ret -= (childSize - 1);
}
return ret;
}
} // Syntax
} // Moses
|