|
#include "KBestExtractor.h" |
|
|
|
#include "moses/ScoreComponentCollection.h" |
|
#include "moses/StaticData.h" |
|
|
|
#include <boost/scoped_ptr.hpp> |
|
|
|
#include <vector> |
|
|
|
namespace Moses |
|
{ |
|
namespace Syntax |
|
{ |
|
|
|
|
|
void KBestExtractor::Extract( |
|
const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices, |
|
std::size_t k, KBestVec &kBestList) |
|
{ |
|
kBestList.clear(); |
|
if (topLevelVertices.empty()) { |
|
return; |
|
} |
|
|
|
|
|
|
|
std::vector<boost::shared_ptr<SVertex> >::const_iterator p = |
|
topLevelVertices.begin(); |
|
SVertex &bestTopLevelVertex = **p; |
|
boost::scoped_ptr<SVertex> supremeVertex(new SVertex()); |
|
supremeVertex->pvertex = 0; |
|
supremeVertex->best = new SHyperedge(); |
|
supremeVertex->best->head = supremeVertex.get(); |
|
supremeVertex->best->tail.push_back(&bestTopLevelVertex); |
|
supremeVertex->best->label.futureScore = |
|
bestTopLevelVertex.best->label.futureScore; |
|
supremeVertex->best->label.deltas = bestTopLevelVertex.best->label.deltas; |
|
supremeVertex->best->label.translation = 0; |
|
|
|
|
|
|
|
for (++p; p != topLevelVertices.end(); ++p) { |
|
|
|
UTIL_THROW_IF2((*p)->best->label.futureScore > |
|
bestTopLevelVertex.best->label.futureScore, |
|
"top-level SVertices are not correctly sorted"); |
|
|
|
|
|
SHyperedge *altEdge = new SHyperedge(); |
|
altEdge->head = supremeVertex.get(); |
|
altEdge->tail.push_back((*p).get()); |
|
altEdge->label.futureScore = (*p)->best->label.futureScore; |
|
altEdge->label.deltas = (*p)->best->label.deltas; |
|
altEdge->label.translation = 0; |
|
supremeVertex->recombined.push_back(altEdge); |
|
} |
|
|
|
|
|
boost::shared_ptr<KVertex> targetVertex = FindOrCreateVertex(*supremeVertex); |
|
LazyKthBest(targetVertex, k, k); |
|
|
|
|
|
|
|
kBestList.reserve(targetVertex->kBestList.size()); |
|
for (std::vector<boost::weak_ptr<Derivation> >::const_iterator |
|
q = targetVertex->kBestList.begin(); |
|
q != targetVertex->kBestList.end(); ++q) { |
|
const boost::shared_ptr<Derivation> d(*q); |
|
assert(d); |
|
assert(d->subderivations.size() == 1); |
|
kBestList.push_back(d->subderivations[0]); |
|
} |
|
} |
|
|
|
|
|
Phrase KBestExtractor::GetOutputPhrase(const Derivation &d) |
|
{ |
|
FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor; |
|
|
|
Phrase ret(ARRAY_SIZE_INCR); |
|
|
|
const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation); |
|
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = |
|
phrase.GetAlignNonTerm().GetNonTermIndexMap(); |
|
for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) { |
|
const Word &word = phrase.GetWord(pos); |
|
if (word.IsNonTerminal()) { |
|
std::size_t nonTermInd = nonTermIndexMap[pos]; |
|
const Derivation &subderivation = *d.subderivations[nonTermInd]; |
|
Phrase subPhrase = GetOutputPhrase(subderivation); |
|
ret.Append(subPhrase); |
|
} else { |
|
ret.AddWord(word); |
|
if (placeholderFactor == NOT_FOUND) { |
|
continue; |
|
} |
|
|
|
UTIL_THROW2("placeholders are not currently supported by the S2T decoder"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
} |
|
|
|
return ret; |
|
} |
|
|
|
|
|
TreePointer KBestExtractor::GetOutputTree(const Derivation &d) |
|
{ |
|
const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation); |
|
if (const PhraseProperty *property = phrase.GetProperty("Tree")) { |
|
const std::string *tree = property->GetValueString(); |
|
TreePointer mytree (boost::make_shared<InternalTree>(*tree)); |
|
|
|
|
|
std::vector<TreePointer> previous_trees; |
|
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) { |
|
const Word &word = phrase.GetWord(pos); |
|
if (word.IsNonTerminal()) { |
|
size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos]; |
|
const Derivation &subderivation = *d.subderivations[nonTermInd]; |
|
const TreePointer prev_tree = GetOutputTree(subderivation); |
|
previous_trees.push_back(prev_tree); |
|
} |
|
} |
|
|
|
mytree->Combine(previous_trees); |
|
return mytree; |
|
} else { |
|
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found"); |
|
} |
|
} |
|
|
|
|
|
|
|
boost::shared_ptr<KBestExtractor::KVertex> |
|
KBestExtractor::FindOrCreateVertex(const SVertex &v) |
|
{ |
|
|
|
assert(v.best); |
|
|
|
VertexMap::value_type element(&v, boost::shared_ptr<KVertex>()); |
|
std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element); |
|
boost::shared_ptr<KVertex> &sp = p.first->second; |
|
if (!p.second) { |
|
return sp; |
|
} |
|
sp.reset(new KVertex(v)); |
|
|
|
boost::shared_ptr<KHyperedge> bestEdge(new KHyperedge(*(v.best))); |
|
bestEdge->head = sp; |
|
std::size_t kTailSize = 0; |
|
for (std::size_t i = 0; i < v.best->tail.size(); ++i) { |
|
const SVertex *pred = v.best->tail[i]; |
|
if (pred->best) { |
|
++kTailSize; |
|
} |
|
} |
|
bestEdge->tail.reserve(kTailSize); |
|
for (std::size_t i = 0; i < v.best->tail.size(); ++i) { |
|
const SVertex *pred = v.best->tail[i]; |
|
if (pred->best) { |
|
bestEdge->tail.push_back(FindOrCreateVertex(*pred)); |
|
} |
|
} |
|
boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge)); |
|
#ifndef NDEBUG |
|
std::pair<DerivationSet::iterator, bool> q = |
|
#endif |
|
m_derivations.insert(bestDerivation); |
|
assert(q.second); |
|
sp->kBestList.push_back(bestDerivation); |
|
return sp; |
|
} |
|
|
|
|
|
|
|
void KBestExtractor::GetCandidates(boost::shared_ptr<KVertex> v, std::size_t k) |
|
{ |
|
|
|
|
|
for (std::size_t i = 0; i < v->svertex.recombined.size(); ++i) { |
|
const SHyperedge ­peredge = *(v->svertex.recombined[i]); |
|
boost::shared_ptr<KHyperedge> bestEdge(new KHyperedge(shyperedge)); |
|
bestEdge->head = v; |
|
|
|
std::size_t kTailSize = 0; |
|
for (std::size_t j = 0; j < shyperedge.tail.size(); ++j) { |
|
const SVertex *pred = shyperedge.tail[j]; |
|
if (pred->best) { |
|
++kTailSize; |
|
} |
|
} |
|
bestEdge->tail.reserve(kTailSize); |
|
for (std::size_t j = 0; j < shyperedge.tail.size(); ++j) { |
|
const SVertex *pred = shyperedge.tail[j]; |
|
if (pred->best) { |
|
bestEdge->tail.push_back(FindOrCreateVertex(*pred)); |
|
} |
|
} |
|
boost::shared_ptr<Derivation> derivation(new Derivation(bestEdge)); |
|
#ifndef NDEBUG |
|
std::pair<DerivationSet::iterator, bool> q = |
|
#endif |
|
m_derivations.insert(derivation); |
|
assert(q.second); |
|
v->candidates.push(derivation); |
|
} |
|
} |
|
|
|
|
|
void KBestExtractor::LazyKthBest(boost::shared_ptr<KVertex> v, std::size_t k, |
|
std::size_t globalK) |
|
{ |
|
|
|
if (v->visited == false) { |
|
|
|
assert(v->kBestList.size() == 1); |
|
|
|
GetCandidates(v, globalK); |
|
v->visited = true; |
|
} |
|
|
|
|
|
while (v->kBestList.size() < k) { |
|
assert(!v->kBestList.empty()); |
|
|
|
|
|
boost::shared_ptr<Derivation> d(v->kBestList.back()); |
|
LazyNext(*v, *d, globalK); |
|
|
|
if (v->candidates.empty()) { |
|
break; |
|
} |
|
|
|
boost::weak_ptr<Derivation> next = v->candidates.top(); |
|
v->candidates.pop(); |
|
|
|
v->kBestList.push_back(next); |
|
} |
|
} |
|
|
|
|
|
void KBestExtractor::LazyNext(KVertex &v, const Derivation &d, |
|
std::size_t globalK) |
|
{ |
|
for (std::size_t i = 0; i < d.edge->tail.size(); ++i) { |
|
boost::shared_ptr<KVertex> pred = d.edge->tail[i]; |
|
|
|
std::size_t k = d.backPointers[i] + 2; |
|
LazyKthBest(pred, k, globalK); |
|
if (pred->kBestList.size() < k) { |
|
|
|
continue; |
|
} |
|
|
|
boost::shared_ptr<Derivation> next(new Derivation(d, i)); |
|
|
|
std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next); |
|
if (p.second) { |
|
v.candidates.push(next); |
|
} |
|
} |
|
} |
|
|
|
|
|
KBestExtractor::Derivation::Derivation(const boost::shared_ptr<KHyperedge> &e) |
|
{ |
|
edge = e; |
|
const TargetPhrase *translation = edge->shyperedge.label.translation; |
|
|
|
|
|
if (translation) { |
|
scoreBreakdown = translation->GetScoreBreakdown(); |
|
} |
|
const std::size_t arity = edge->tail.size(); |
|
backPointers.resize(arity, 0); |
|
subderivations.reserve(arity); |
|
for (std::size_t i = 0; i < arity; ++i) { |
|
const KVertex &pred = *(edge->tail[i]); |
|
assert(pred.kBestList.size() >= 1); |
|
boost::shared_ptr<Derivation> sub(pred.kBestList[0]); |
|
subderivations.push_back(sub); |
|
scoreBreakdown.PlusEquals(sub->scoreBreakdown); |
|
} |
|
scoreBreakdown.PlusEquals(edge->shyperedge.label.deltas); |
|
score = scoreBreakdown.GetWeightedScore(); |
|
} |
|
|
|
|
|
KBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i) |
|
{ |
|
edge = d.edge; |
|
backPointers = d.backPointers; |
|
subderivations = d.subderivations; |
|
std::size_t j = ++backPointers[i]; |
|
scoreBreakdown = d.scoreBreakdown; |
|
|
|
scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown); |
|
|
|
boost::shared_ptr<Derivation> newSub(edge->tail[i]->kBestList[j]); |
|
subderivations[i] = newSub; |
|
|
|
scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown); |
|
score = scoreBreakdown.GetWeightedScore(); |
|
} |
|
|
|
} |
|
} |
|
|