File size: 5,764 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#include "PatternApplicationTrie.h"

#include "moses/Syntax/PVertex.h"

namespace Moses
{
namespace Syntax
{
namespace S2T
{

int PatternApplicationTrie::Depth() const
{
  if (m_parent) {
    return m_parent->Depth() + 1;
  }
  return 0;
}

const PatternApplicationTrie *
PatternApplicationTrie::GetHighestTerminalNode() const
{
  // Check if result has been cached.
  if (m_highestTerminalNode) {
    return m_highestTerminalNode;
  }
  // It doesn't really make sense to call this on the root node.  Just return 0.
  if (!m_parent) {
    return 0;
  }
  // Is this the highest non-root node?
  if (!m_parent->m_parent) {
    if (IsTerminalNode()) {
      m_highestTerminalNode = this;
      return this;
    } else {
      return 0;
    }
  }
  // This is not the highest non-root node, so ask parent node.
  if (const PatternApplicationTrie *p = m_parent->GetHighestTerminalNode()) {
    m_highestTerminalNode = p;
    return p;
  }
  // There are no terminal nodes higher than this node.
  if (IsTerminalNode()) {
    m_highestTerminalNode = this;
  }
  return m_highestTerminalNode;
}

const PatternApplicationTrie *
PatternApplicationTrie::GetLowestTerminalNode() const
{
  // Check if result has been cached.
  if (m_lowestTerminalNode) {
    return m_lowestTerminalNode;
  }
  // It doesn't really make sense to call this on the root node.  Just return 0.
  if (!m_parent) {
    return 0;
  }
  // Is this a terminal node?
  if (IsTerminalNode()) {
    m_lowestTerminalNode = this;
    return this;
  }
  // Is this the highest non-root node?
  if (!m_parent->m_parent) {
    return 0;
  }
  // Ask parent node.
  return m_parent->GetLowestTerminalNode();
}

// A node corresponds to a rule pattern that has been partially applied to a
// sentence (the terminals have fixed positions, but the spans of gap symbols
// may be unknown).  This function determines the range of possible start
// values for the partially-applied pattern.
void PatternApplicationTrie::DetermineStartRange(int sentenceLength,
    int &minStart,
    int &maxStart) const
{
  // Find the leftmost terminal symbol, if any.
  const PatternApplicationTrie *n = GetHighestTerminalNode();
  if (!n) {
    // The pattern contains only gap symbols.
    minStart = 0;
    maxStart = sentenceLength-Depth();
    return;
  }
  assert(n->m_parent);
  if (!n->m_parent->m_parent) {
    // The pattern begins with a terminal symbol so the start position is
    // fixed.
    minStart = n->m_start;
    maxStart = n->m_start;
  } else {
    // The pattern begins with a gap symbol but it contains at least one
    // terminal symbol.  The maximum start position is the start position of
    // the leftmost terminal minus one position for each leading gap symbol.
    minStart = 0;
    maxStart = n->m_start - (n->Depth()-1);
  }
}

// A node corresponds to a rule pattern that has been partially applied to a
// sentence (the terminals have fixed positions, but the spans of gap symbols
// may be unknown).  This function determines the range of possible end values
// for the partially-applied pattern.
void PatternApplicationTrie::DetermineEndRange(int sentenceLength,
    int &minEnd,
    int &maxEnd) const
{
  // Find the rightmost terminal symbol, if any.
  const PatternApplicationTrie *n = GetLowestTerminalNode();
  if (!n) {
    // The pattern contains only gap symbols.
    minEnd = Depth()-1;
    maxEnd = sentenceLength-1;
    return;
  }
  if (n == this) {
    // The pattern ends with a terminal symbol so the end position is fixed.
    minEnd = m_end;
    maxEnd = m_end;
  } else {
    // The pattern ends with a gap symbol but it contains at least one terminal
    // symbol.  The minimum end position is the end position of the rightmost
    // terminal + one position for each trailing gap symbol.
    minEnd = n->m_end + (Depth()-n->Depth());
    maxEnd = sentenceLength-1;
  }
}

void PatternApplicationTrie::Extend(const RuleTrieScope3::Node &node,
                                    int minPos, const SentenceMap &sentMap,
                                    bool followsGap)
{
  const RuleTrieScope3::Node::TerminalMap &termMap = node.GetTerminalMap();
  for (RuleTrieScope3::Node::TerminalMap::const_iterator p = termMap.begin();
       p != termMap.end(); ++p) {
    const Word &word = p->first;
    const RuleTrieScope3::Node &child = p->second;
    SentenceMap::const_iterator q = sentMap.find(word);
    if (q == sentMap.end()) {
      continue;
    }
    for (std::vector<const PVertex *>::const_iterator r = q->second.begin();
         r != q->second.end(); ++r) {
      const PVertex *v = *r;
      std::size_t start = v->span.GetStartPos();
      std::size_t end = v->span.GetEndPos();
      if (start == (std::size_t)minPos ||
          (followsGap && start > (std::size_t)minPos) ||
          minPos == -1) {
        PatternApplicationTrie *subTrie =
          new PatternApplicationTrie(start, end, child, v, this);
        subTrie->Extend(child, end+1, sentMap, false);
        m_children.push_back(subTrie);
      }
    }
  }

  const RuleTrieScope3::Node *child = node.GetNonTerminalChild();
  if (!child) {
    return;
  }
  int start = followsGap ? -1 : minPos;
  PatternApplicationTrie *subTrie =
    new PatternApplicationTrie(start, -1, *child, 0, this);
  int newMinPos = (minPos == -1 ? 1 : minPos+1);
  subTrie->Extend(*child, newMinPos, sentMap, true);
  m_children.push_back(subTrie);
}

void PatternApplicationTrie::ReadOffPatternApplicationKey(
  PatternApplicationKey &key) const
{
  const int depth = Depth();
  key.resize(depth);
  const PatternApplicationTrie *p = this;
  std::size_t i = depth-1;
  while (p->m_parent != 0) {
    key[i--] = p;
    p = p->m_parent;
  }
}

}  // namespace S2T
}  // namespace Moses
}  // namespace Syntax