|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
#ifndef EXTRACT_GHKM_NODE_H_ |
|
|
#define EXTRACT_GHKM_NODE_H_ |
|
|
|
|
|
#include <cassert> |
|
|
#include <iterator> |
|
|
#include <string> |
|
|
#include <vector> |
|
|
|
|
|
#include "Span.h" |
|
|
|
|
|
namespace MosesTraining |
|
|
{ |
|
|
namespace Syntax |
|
|
{ |
|
|
namespace GHKM |
|
|
{ |
|
|
|
|
|
class Subgraph; |
|
|
|
|
|
enum NodeType { SOURCE, TARGET, TREE }; |
|
|
|
|
|
class Node |
|
|
{ |
|
|
public: |
|
|
Node(const std::string &label, NodeType type) |
|
|
: m_label(label) |
|
|
, m_type(type) |
|
|
, m_pcfgScore(0.0f) {} |
|
|
|
|
|
~Node(); |
|
|
|
|
|
const std::string &GetLabel() const { |
|
|
return m_label; |
|
|
} |
|
|
NodeType GetType() const { |
|
|
return m_type; |
|
|
} |
|
|
const std::vector<Node*> &GetChildren() const { |
|
|
return m_children; |
|
|
} |
|
|
const std::vector<Node*> &GetParents() const { |
|
|
return m_parents; |
|
|
} |
|
|
float GetPcfgScore() const { |
|
|
return m_pcfgScore; |
|
|
} |
|
|
const Span &GetSpan() const { |
|
|
return m_span; |
|
|
} |
|
|
const Span &GetComplementSpan() const { |
|
|
return m_complementSpan; |
|
|
} |
|
|
const std::vector<const Subgraph*> &GetRules() const { |
|
|
return m_rules; |
|
|
} |
|
|
|
|
|
void SetChildren(const std::vector<Node*> &c) { |
|
|
m_children = c; |
|
|
} |
|
|
void SetParents(const std::vector<Node*> &p) { |
|
|
m_parents = p; |
|
|
} |
|
|
void SetPcfgScore(float s) { |
|
|
m_pcfgScore = s; |
|
|
} |
|
|
void SetSpan(const Span &s) { |
|
|
m_span = s; |
|
|
} |
|
|
void SetComplementSpan(const Span &cs) { |
|
|
m_complementSpan = cs; |
|
|
} |
|
|
|
|
|
void AddChild(Node *c) { |
|
|
m_children.push_back(c); |
|
|
} |
|
|
void AddParent(Node *p) { |
|
|
m_parents.push_back(p); |
|
|
} |
|
|
void AddRule(const Subgraph *s) { |
|
|
m_rules.push_back(s); |
|
|
} |
|
|
|
|
|
bool IsSink() const { |
|
|
return m_children.empty(); |
|
|
} |
|
|
bool IsPreterminal() const; |
|
|
|
|
|
void PropagateIndex(int); |
|
|
|
|
|
std::vector<std::string> GetTargetWords() const; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<typename OutputIterator> |
|
|
void GetTreeAncestors(OutputIterator result, bool includeSelf=false); |
|
|
|
|
|
|
|
|
|
|
|
template<typename InputIterator> |
|
|
static Node *LowestCommonAncestor(InputIterator first, InputIterator last); |
|
|
|
|
|
private: |
|
|
|
|
|
Node(const Node &); |
|
|
Node &operator=(const Node &); |
|
|
|
|
|
void GetTargetWords(std::vector<std::string> &) const; |
|
|
|
|
|
std::string m_label; |
|
|
NodeType m_type; |
|
|
std::vector<Node*> m_children; |
|
|
std::vector<Node*> m_parents; |
|
|
float m_pcfgScore; |
|
|
Span m_span; |
|
|
Span m_complementSpan; |
|
|
std::vector<const Subgraph*> m_rules; |
|
|
}; |
|
|
|
|
|
template<typename OutputIterator> |
|
|
void Node::GetTreeAncestors(OutputIterator result, bool includeSelf) |
|
|
{ |
|
|
|
|
|
assert(m_type == TARGET || m_type == TREE); |
|
|
|
|
|
if (includeSelf) { |
|
|
*result++ = this; |
|
|
} |
|
|
|
|
|
Node *ancestor = !(m_parents.empty()) ? m_parents[0] : 0; |
|
|
while (ancestor != 0) { |
|
|
*result++ = ancestor; |
|
|
ancestor = !(ancestor->m_parents.empty()) ? ancestor->m_parents[0] : 0; |
|
|
} |
|
|
} |
|
|
|
|
|
template<typename InputIterator> |
|
|
Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last) |
|
|
{ |
|
|
|
|
|
if (first == last) { |
|
|
return 0; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
InputIterator p = first; |
|
|
Node *lca = *p++; |
|
|
for (; p != last; ++p) { |
|
|
Node *node = *p; |
|
|
assert(node->m_type != SOURCE); |
|
|
if (node != lca) { |
|
|
lca = 0; |
|
|
} |
|
|
} |
|
|
if (lca) { |
|
|
return lca; |
|
|
} |
|
|
|
|
|
|
|
|
size_t minPathLength = 0; |
|
|
std::vector<std::vector<Node *> > paths; |
|
|
for (p = first; p != last; ++p) { |
|
|
paths.resize(paths.size()+1); |
|
|
(*p)->GetTreeAncestors(std::back_inserter(paths.back()), true); |
|
|
size_t pathLength = paths.back().size(); |
|
|
assert(pathLength > 0); |
|
|
if (paths.size() == 1 || pathLength < minPathLength) { |
|
|
minPathLength = pathLength; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < minPathLength; ++i) { |
|
|
bool match = true; |
|
|
for (size_t j = 0; j < paths.size(); ++j) { |
|
|
size_t index = paths[j].size() - minPathLength + i; |
|
|
assert(index >= 0); |
|
|
assert(index < paths[j].size()); |
|
|
if (j == 0) { |
|
|
lca = paths[j][index]; |
|
|
assert(lca); |
|
|
} else if (lca != paths[j][index]) { |
|
|
match = false; |
|
|
break; |
|
|
} |
|
|
} |
|
|
if (match) { |
|
|
return lca; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
assert(false); |
|
|
return 0; |
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
#endif |
|
|
|