|
#include "InputTreeBuilder.h" |
|
|
|
#include "moses/StaticData.h" |
|
|
|
namespace Moses |
|
{ |
|
namespace Syntax |
|
{ |
|
namespace T2S |
|
{ |
|
|
|
InputTreeBuilder::InputTreeBuilder(std::vector<FactorType> const& oFactors) |
|
: m_outputFactorOrder(oFactors) |
|
{ |
|
} |
|
|
|
void InputTreeBuilder::Build(const TreeInput &in, |
|
const std::string &topLevelLabel, |
|
InputTree &out) |
|
{ |
|
CreateNodes(in, topLevelLabel, out); |
|
ConnectNodes(out); |
|
} |
|
|
|
|
|
void InputTreeBuilder::CreateNodes(const TreeInput &in, |
|
const std::string &topLevelLabel, |
|
InputTree &out) |
|
{ |
|
|
|
const std::size_t numWords = in.GetSize(); |
|
|
|
|
|
|
|
|
|
std::vector<XMLParseOutput> xmlNodes = in.GetLabelledSpans(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
SortXmlNodesIntoPostOrder(xmlNodes); |
|
|
|
|
|
|
|
std::vector<XMLParseOutput> nonTerms; |
|
nonTerms.reserve(xmlNodes.size()+1); |
|
for (std::vector<XMLParseOutput>::const_iterator p = xmlNodes.begin(); |
|
p != xmlNodes.end(); ++p) { |
|
std::size_t start = p->m_range.GetStartPos(); |
|
std::size_t end = p->m_range.GetEndPos(); |
|
nonTerms.push_back(XMLParseOutput(p->m_label, Range(start+1, end+1))); |
|
} |
|
|
|
nonTerms.push_back(XMLParseOutput(topLevelLabel, Range(0, numWords-1))); |
|
|
|
|
|
|
|
|
|
|
|
out.nodes.reserve(numWords + nonTerms.size()); |
|
out.nodesAtPos.resize(numWords); |
|
|
|
|
|
int prevStart = -1; |
|
int prevEnd = -1; |
|
for (std::vector<XMLParseOutput>::const_iterator p = nonTerms.begin(); |
|
p != nonTerms.end(); ++p) { |
|
int start = static_cast<int>(p->m_range.GetStartPos()); |
|
int end = static_cast<int>(p->m_range.GetEndPos()); |
|
|
|
|
|
if (start != prevStart && end != prevEnd) { |
|
|
|
|
|
for (int i = prevEnd+1; i <= end; ++i) { |
|
PVertex v(Range(i, i), in.GetWord(i)); |
|
out.nodes.push_back(InputTree::Node(v)); |
|
out.nodesAtPos[i].push_back(&out.nodes.back()); |
|
} |
|
} |
|
|
|
Word w(true); |
|
w.CreateFromString(Moses::Output, m_outputFactorOrder, p->m_label, true); |
|
PVertex v(Range(start, end), w); |
|
out.nodes.push_back(InputTree::Node(v)); |
|
out.nodesAtPos[start].push_back(&out.nodes.back()); |
|
|
|
prevStart = start; |
|
prevEnd = end; |
|
} |
|
} |
|
|
|
|
|
void InputTreeBuilder::ConnectNodes(InputTree &out) |
|
{ |
|
|
|
std::vector<InputTree::Node*> parents(out.nodes.size(), NULL); |
|
for (std::size_t i = 0; i < out.nodes.size()-1; ++i) { |
|
const InputTree::Node &node = out.nodes[i]; |
|
std::size_t start = node.pvertex.span.GetStartPos(); |
|
std::size_t end = node.pvertex.span.GetEndPos(); |
|
|
|
std::size_t j = i+1; |
|
while (true) { |
|
const InputTree::Node &succ = out.nodes[j]; |
|
std::size_t succStart = succ.pvertex.span.GetStartPos(); |
|
std::size_t succEnd = succ.pvertex.span.GetEndPos(); |
|
if (succStart <= start && succEnd >= end) { |
|
break; |
|
} |
|
++j; |
|
} |
|
parents[i] = &(out.nodes[j]); |
|
} |
|
|
|
|
|
for (std::size_t i = 0; i < out.nodes.size()-1; ++i) { |
|
InputTree::Node &child = out.nodes[i]; |
|
InputTree::Node &parent = *(parents[i]); |
|
parent.children.push_back(&child); |
|
} |
|
} |
|
|
|
void InputTreeBuilder::SortXmlNodesIntoPostOrder( |
|
std::vector<XMLParseOutput> &nodes) |
|
{ |
|
|
|
|
|
std::vector<std::pair<XMLParseOutput *, int> > pairs; |
|
pairs.reserve(nodes.size()); |
|
for (std::size_t i = 0; i < nodes.size(); ++i) { |
|
pairs.push_back(std::make_pair(&(nodes[i]), i)); |
|
} |
|
|
|
|
|
std::sort(pairs.begin(), pairs.end(), PostOrderComp); |
|
|
|
|
|
std::vector<XMLParseOutput> tmp; |
|
tmp.reserve(nodes.size()); |
|
for (std::size_t i = 0; i < pairs.size(); ++i) { |
|
tmp.push_back(nodes[pairs[i].second]); |
|
} |
|
nodes.swap(tmp); |
|
} |
|
|
|
|
|
bool InputTreeBuilder::PostOrderComp(const std::pair<XMLParseOutput *, int> &x, |
|
const std::pair<XMLParseOutput *, int> &y) |
|
{ |
|
std::size_t xStart = x.first->m_range.GetStartPos(); |
|
std::size_t xEnd = x.first->m_range.GetEndPos(); |
|
std::size_t yStart = y.first->m_range.GetStartPos(); |
|
std::size_t yEnd = y.first->m_range.GetEndPos(); |
|
|
|
if (xEnd == yEnd) { |
|
if (xStart == yStart) { |
|
return x.second < y.second; |
|
} else { |
|
return xStart > yStart; |
|
} |
|
} else { |
|
return xEnd < yEnd; |
|
} |
|
} |
|
|
|
} |
|
} |
|
} |
|
|