File size: 1,844 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
#pragma once
#include <stack>
#include <vector>
#include "util/string_piece.hh"
#include "moses/FactorCollection.h"
#include "moses/TypeDef.h"
#include "HyperPath.h"
#include "TreeFragmentTokenizer.h"
namespace Moses
{
namespace Syntax
{
namespace F2S
{
// Parses a string representation of a tree fragment, adding the terminals
// and non-terminals to FactorCollection::Instance() and building a
// HyperPath object.
//
// This class is designed to be used during rule table loading. Since every
// rule has a tree fragment on the source-side, Load() may be called millions
// of times. The algorithm therefore sacrifices readability for speed and
// shoehorns everything into two passes over the input token sequence.
//
class HyperPathLoader
{
public:
void Load(const StringPiece &, HyperPath &);
private:
struct NodeTuple {
int index; // Preorder index of the node.
int parent; // Preorder index of the node's parent.
int depth; // Depth of the node.
std::size_t symbol; // Either the factor ID of a tree terminal/non-terminal
// or for virtual nodes, HyperPath::kEpsilon.
};
// Determine the height of the current tree fragment (stored in m_tokenSeq).
int DetermineHeight() const;
// Generate the preorder sequence of NodeTuples for the current tree fragment,
// including virtual nodes.
void GenerateNodeTupleSeq(int height);
const Factor *AddTerminalFactor(const StringPiece &s) {
return FactorCollection::Instance().AddFactor(s, false);
}
const Factor *AddNonTerminalFactor(const StringPiece &s) {
return FactorCollection::Instance().AddFactor(s, true);
}
std::vector<TreeFragmentToken> m_tokenSeq;
std::vector<NodeTuple> m_nodeTupleSeq;
std::stack<int> m_parentStack;
};
} // namespace F2S
} // namespace Syntax
} // namespace Moses
|