File size: 5,379 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "SyntaxNodeCollection.h"
#include <cassert>
#include <iostream>
namespace MosesTraining
{
SyntaxNodeCollection::~SyntaxNodeCollection()
{
Clear();
}
void SyntaxNodeCollection::Clear()
{
// loop through all m_nodes, delete them
for(size_t i=0; i<m_nodes.size(); i++) {
delete m_nodes[i];
}
m_nodes.clear();
m_index.clear();
}
SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
const std::string &label)
{
SyntaxNode* newNode = new SyntaxNode(label, startPos, endPos);
m_nodes.push_back( newNode );
m_index[ startPos ][ endPos ].push_back( newNode );
m_endPositionsIndex[ endPos ].push_back( newNode );
m_startPositionsIndex[ startPos ].push_back( newNode ); // TODO: may not need this: access m_index by startPos and iterate over its InnerNodeIndex (= end positions)?
m_numWords = std::max(endPos+1, m_numWords);
return newNode;
}
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
{
return GetNodes( startPos, endPos).size() > 0;
}
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes(
int startPos, int endPos ) const
{
NodeIndex::const_iterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )
return m_emptyNode;
InnerNodeIndex::const_iterator endIndex = startIndex->second.find( endPos );
if (endIndex == startIndex->second.end())
return m_emptyNode;
return endIndex->second;
}
bool SyntaxNodeCollection::HasNodeStartingAtPosition( int startPos ) const
{
return GetNodesByStartPosition(startPos).size() > 0;
}
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByStartPosition(
int startPos ) const
{
InnerNodeIndex::const_iterator startIndex = m_startPositionsIndex.find( startPos );
if (startIndex == m_startPositionsIndex.end() )
return m_emptyNode;
return startIndex->second;
}
bool SyntaxNodeCollection::HasNodeEndingAtPosition( int endPos ) const
{
return GetNodesByEndPosition(endPos).size() > 0;
}
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodesByEndPosition(
int endPos ) const
{
InnerNodeIndex::const_iterator endIndex = m_endPositionsIndex.find( endPos );
if (endIndex == m_endPositionsIndex.end() )
return m_emptyNode;
return endIndex->second;
}
std::auto_ptr<SyntaxTree> SyntaxNodeCollection::ExtractTree()
{
std::map<SyntaxNode *, SyntaxTree *> nodeToTree;
// Create a SyntaxTree object for each SyntaxNode.
for (std::vector<SyntaxNode*>::const_iterator p = m_nodes.begin();
p != m_nodes.end(); ++p) {
nodeToTree[*p] = new SyntaxTree(**p);
}
// Connect the SyntaxTrees.
typedef NodeIndex::const_iterator OuterIterator;
typedef InnerNodeIndex::const_reverse_iterator InnerIterator;
SyntaxTree *root = 0;
SyntaxNode *prevNode = 0;
SyntaxTree *prevTree = 0;
// Iterate over all start indices from lowest to highest.
for (OuterIterator p = m_index.begin(); p != m_index.end(); ++p) {
const InnerNodeIndex &inner = p->second;
// Iterate over all end indices from highest to lowest.
for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) {
const std::vector<SyntaxNode*> &nodes = q->second;
// Iterate over all nodes that cover the same span in order of tree
// depth, top-most first.
for (std::vector<SyntaxNode*>::const_reverse_iterator r = nodes.rbegin();
r != nodes.rend(); ++r) {
SyntaxNode *node = *r;
SyntaxTree *tree = nodeToTree[node];
if (!prevNode) {
// node is the root.
root = tree;
tree->parent() = 0;
} else if (prevNode->start == node->start) {
// prevNode is the parent of node.
assert(prevNode->end >= node->end);
tree->parent() = prevTree;
prevTree->children().push_back(tree);
} else {
// prevNode is a descendant of node's parent. The lowest common
// ancestor of prevNode and node will be node's parent.
SyntaxTree *ancestor = prevTree->parent();
while (ancestor->value().end < tree->value().end) {
ancestor = ancestor->parent();
}
assert(ancestor);
tree->parent() = ancestor;
ancestor->children().push_back(tree);
}
prevNode = node;
prevTree = tree;
}
}
}
return std::auto_ptr<SyntaxTree>(root);
}
} // namespace MosesTraining
|