File size: 1,844 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#pragma once

#include <stack>
#include <vector>

#include "util/string_piece.hh"

#include "moses/FactorCollection.h"
#include "moses/TypeDef.h"

#include "HyperPath.h"
#include "TreeFragmentTokenizer.h"

namespace Moses
{
namespace Syntax
{
namespace F2S
{

// Parses a string representation of a tree fragment, adding the terminals
// and non-terminals to FactorCollection::Instance() and building a
// HyperPath object.
//
// This class is designed to be used during rule table loading.  Since every
// rule has a tree fragment on the source-side, Load() may be called millions
// of times.  The algorithm therefore sacrifices readability for speed and
// shoehorns everything into two passes over the input token sequence.
//
class HyperPathLoader
{
public:
  void Load(const StringPiece &, HyperPath &);

private:
  struct NodeTuple {
    int index;          // Preorder index of the node.
    int parent;         // Preorder index of the node's parent.
    int depth;          // Depth of the node.
    std::size_t symbol; // Either the factor ID of a tree terminal/non-terminal
    // or for virtual nodes, HyperPath::kEpsilon.
  };

  // Determine the height of the current tree fragment (stored in m_tokenSeq).
  int DetermineHeight() const;

  // Generate the preorder sequence of NodeTuples for the current tree fragment,
  // including virtual nodes.
  void GenerateNodeTupleSeq(int height);

  const Factor *AddTerminalFactor(const StringPiece &s) {
    return FactorCollection::Instance().AddFactor(s, false);
  }

  const Factor *AddNonTerminalFactor(const StringPiece &s) {
    return FactorCollection::Instance().AddFactor(s, true);
  }

  std::vector<TreeFragmentToken> m_tokenSeq;
  std::vector<NodeTuple> m_nodeTupleSeq;
  std::stack<int> m_parentStack;
};

}  // namespace F2S
}  // namespace Syntax
}  // namespace Moses