|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "SyntaxNodeCollection.h" |
|
|
|
#include <cassert> |
|
#include <iostream> |
|
|
|
namespace MosesTraining |
|
{ |
|
|
|
SyntaxNodeCollection::~SyntaxNodeCollection() |
|
{ |
|
Clear(); |
|
} |
|
|
|
void SyntaxNodeCollection::Clear() |
|
{ |
|
|
|
for(size_t i=0; i<m_nodes.size(); i++) { |
|
delete m_nodes[i]; |
|
} |
|
m_nodes.clear(); |
|
m_index.clear(); |
|
} |
|
|
|
SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos, |
|
const std::string &label) |
|
{ |
|
SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos); |
|
m_nodes.push_back( newNode ); |
|
m_index[ startPos ][ endPos ].push_back( newNode ); |
|
m_endPositionsIndex[ endPos ].push_back( newNode ); |
|
m_startPositionsIndex[ startPos ].push_back( newNode ); |
|
m_numWords = std::max(endPos+1, m_numWords); |
|
return newNode; |
|
} |
|
|
|
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const |
|
{ |
|
return GetNodes( startPos, endPos).size() > 0; |
|
} |
|
|
|
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( |
|
int startPos, int endPos ) const |
|
{ |
|
NodeIndex::const_iterator startIndex = m_index.find( startPos ); |
|
if (startIndex == m_index.end() ) |
|
return m_emptyNode; |
|
|
|
InnerNodeIndex::const_iterator endIndex = startIndex->second.find( endPos ); |
|
if (endIndex == startIndex->second.end()) |
|
return m_emptyNode; |
|
|
|
return endIndex->second; |
|
} |
|
|
|
bool SyntaxNodeCollection::HasNodeStartingAtPosition( int startPos ) const |
|
{ |
|
return GetNodesByStartPosition(startPos).size() > 0; |
|
} |
|
|
|
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByStartPosition( |
|
int startPos ) const |
|
{ |
|
InnerNodeIndex::const_iterator startIndex = m_startPositionsIndex.find( startPos ); |
|
if (startIndex == m_startPositionsIndex.end() ) |
|
return m_emptyNode; |
|
|
|
return startIndex->second; |
|
} |
|
|
|
bool SyntaxNodeCollection::HasNodeEndingAtPosition( int endPos ) const |
|
{ |
|
return GetNodesByEndPosition(endPos).size() > 0; |
|
} |
|
|
|
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByEndPosition( |
|
int endPos ) const |
|
{ |
|
InnerNodeIndex::const_iterator endIndex = m_endPositionsIndex.find( endPos ); |
|
if (endIndex == m_endPositionsIndex.end() ) |
|
return m_emptyNode; |
|
|
|
return endIndex->second; |
|
} |
|
|
|
std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree() |
|
{ |
|
std::map<SyntaxNode *, SyntaxTree *> nodeToTree; |
|
|
|
|
|
for (std::vector<SyntaxNode*>::const_iterator p = m_nodes.begin(); |
|
p != m_nodes.end(); ++p) { |
|
nodeToTree[*p] = new SyntaxTree(**p); |
|
} |
|
|
|
|
|
typedef NodeIndex::const_iterator OuterIterator; |
|
typedef InnerNodeIndex::const_reverse_iterator InnerIterator; |
|
|
|
SyntaxTree *root = 0; |
|
SyntaxNode *prevNode = 0; |
|
SyntaxTree *prevTree = 0; |
|
|
|
for (OuterIterator p = m_index.begin(); p != m_index.end(); ++p) { |
|
const InnerNodeIndex &inner = p->second; |
|
|
|
for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) { |
|
const std::vector<SyntaxNode*> &nodes = q->second; |
|
|
|
|
|
for (std::vector<SyntaxNode*>::const_reverse_iterator r = nodes.rbegin(); |
|
r != nodes.rend(); ++r) { |
|
SyntaxNode *node = *r; |
|
SyntaxTree *tree = nodeToTree[node]; |
|
if (!prevNode) { |
|
|
|
root = tree; |
|
tree->parent() = 0; |
|
} else if (prevNode->start == node->start) { |
|
|
|
assert(prevNode->end >= node->end); |
|
tree->parent() = prevTree; |
|
prevTree->children().push_back(tree); |
|
} else { |
|
|
|
|
|
SyntaxTree *ancestor = prevTree->parent(); |
|
while (ancestor->value().end < tree->value().end) { |
|
ancestor = ancestor->parent(); |
|
} |
|
assert(ancestor); |
|
tree->parent() = ancestor; |
|
ancestor->children().push_back(tree); |
|
} |
|
prevNode = node; |
|
prevTree = tree; |
|
} |
|
} |
|
} |
|
|
|
return std::auto_ptr<SyntaxTree>(root); |
|
} |
|
|
|
} |
|
|