|
#include "HyperPathLoader.h" |
|
|
|
#include "TreeFragmentTokenizer.h" |
|
|
|
namespace Moses |
|
{ |
|
namespace Syntax |
|
{ |
|
namespace F2S |
|
{ |
|
|
|
void HyperPathLoader::Load(const StringPiece &s, HyperPath &path) |
|
{ |
|
path.nodeSeqs.clear(); |
|
|
|
m_tokenSeq.clear(); |
|
for (TreeFragmentTokenizer p(s); p != TreeFragmentTokenizer(); ++p) { |
|
m_tokenSeq.push_back(*p); |
|
} |
|
|
|
int height = DetermineHeight(); |
|
|
|
path.nodeSeqs.resize(height+1); |
|
|
|
GenerateNodeTupleSeq(height); |
|
|
|
for (int depth = 0; depth <= height; ++depth) { |
|
int prevParent = -1; |
|
|
|
|
|
for (std::vector<NodeTuple>::const_iterator p = m_nodeTupleSeq.begin(); |
|
p != m_nodeTupleSeq.end(); ++p) { |
|
const NodeTuple &tuple = *p; |
|
if (tuple.depth != depth) { |
|
continue; |
|
} |
|
if (prevParent != -1 && tuple.parent != prevParent) { |
|
path.nodeSeqs[depth].push_back(HyperPath::kComma); |
|
} |
|
path.nodeSeqs[depth].push_back(tuple.symbol); |
|
prevParent = tuple.parent; |
|
} |
|
} |
|
} |
|
|
|
int HyperPathLoader::DetermineHeight() const |
|
{ |
|
int height = 0; |
|
int maxHeight = 0; |
|
std::size_t numTokens = m_tokenSeq.size(); |
|
for (std::size_t i = 0; i < numTokens; ++i) { |
|
if (m_tokenSeq[i].type == TreeFragmentToken_LSB) { |
|
assert(i+2 < numTokens); |
|
|
|
|
|
if (m_tokenSeq[i+2].type != TreeFragmentToken_RSB) { |
|
maxHeight = std::max(++height, maxHeight); |
|
} else { |
|
i += 2; |
|
} |
|
} else if (m_tokenSeq[i].type == TreeFragmentToken_RSB) { |
|
--height; |
|
} |
|
} |
|
return maxHeight; |
|
} |
|
|
|
void HyperPathLoader::GenerateNodeTupleSeq(int height) |
|
{ |
|
m_nodeTupleSeq.clear(); |
|
|
|
|
|
assert(m_parentStack.empty()); |
|
m_parentStack.push(-1); |
|
|
|
|
|
|
|
NodeTuple tuple; |
|
tuple.index = -1; |
|
tuple.parent = -1; |
|
tuple.depth = -1; |
|
tuple.symbol = HyperPath::kEpsilon; |
|
|
|
|
|
std::size_t numTokens = m_tokenSeq.size(); |
|
for (std::size_t i = 0; i < numTokens; ++i) { |
|
if (m_tokenSeq[i].type == TreeFragmentToken_LSB) { |
|
assert(i+2 < numTokens); |
|
|
|
|
|
if (m_tokenSeq[i+2].type != TreeFragmentToken_RSB) { |
|
++tuple.index; |
|
tuple.parent = m_parentStack.top(); |
|
m_parentStack.push(tuple.index); |
|
++tuple.depth; |
|
tuple.symbol = AddNonTerminalFactor(m_tokenSeq[++i].value)->GetId(); |
|
m_nodeTupleSeq.push_back(tuple); |
|
} else { |
|
++tuple.index; |
|
tuple.parent = m_parentStack.top(); |
|
++tuple.depth; |
|
tuple.symbol = AddNonTerminalFactor(m_tokenSeq[++i].value)->GetId(); |
|
m_nodeTupleSeq.push_back(tuple); |
|
|
|
if (tuple.depth < height) { |
|
int origDepth = tuple.depth; |
|
m_parentStack.push(tuple.index); |
|
for (int depth = origDepth+1; depth <= height; ++depth) { |
|
++tuple.index; |
|
tuple.parent = m_parentStack.top(); |
|
m_parentStack.push(tuple.index); |
|
tuple.depth = depth; |
|
tuple.symbol = HyperPath::kEpsilon; |
|
m_nodeTupleSeq.push_back(tuple); |
|
} |
|
for (int depth = origDepth; depth <= height; ++depth) { |
|
m_parentStack.pop(); |
|
} |
|
tuple.depth = origDepth; |
|
} |
|
--tuple.depth; |
|
|
|
++i; |
|
} |
|
} else if (m_tokenSeq[i].type == TreeFragmentToken_WORD) { |
|
|
|
|
|
|
|
++tuple.index; |
|
tuple.parent = m_parentStack.top(); |
|
++tuple.depth; |
|
tuple.symbol = AddTerminalFactor(m_tokenSeq[i].value)->GetId(); |
|
m_nodeTupleSeq.push_back(tuple); |
|
|
|
if (m_tokenSeq[i+1].type == TreeFragmentToken_RSB && |
|
tuple.depth < height) { |
|
int origDepth = tuple.depth; |
|
m_parentStack.push(tuple.index); |
|
for (int depth = origDepth+1; depth <= height; ++depth) { |
|
++tuple.index; |
|
tuple.parent = m_parentStack.top(); |
|
m_parentStack.push(tuple.index); |
|
tuple.depth = depth; |
|
tuple.symbol = HyperPath::kEpsilon; |
|
m_nodeTupleSeq.push_back(tuple); |
|
} |
|
for (int depth = origDepth; depth <= height; ++depth) { |
|
m_parentStack.pop(); |
|
} |
|
tuple.depth = origDepth; |
|
} |
|
--tuple.depth; |
|
} else if (m_tokenSeq[i].type == TreeFragmentToken_RSB) { |
|
m_parentStack.pop(); |
|
--tuple.depth; |
|
} |
|
} |
|
|
|
|
|
m_parentStack.pop(); |
|
} |
|
|
|
} |
|
} |
|
} |
|
|