File size: 2,794 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/*
 * ConsistentPhrases.cpp
 *
 *  Created on: 20 Feb 2014
 *      Author: hieu
 */
#include <sstream>
#include <cassert>
#include "ConsistentPhrases.h"
#include "NonTerm.h"
#include "Parameter.h"
#include "moses/Util.h"

using namespace std;

ConsistentPhrases::ConsistentPhrases()
{
}

ConsistentPhrases::~ConsistentPhrases()
{
  for (size_t start = 0; start < m_coll.size(); ++start) {
    std::vector<Coll> &allSourceStart = m_coll[start];

    for (size_t size = 0; size < allSourceStart.size(); ++size) {
      Coll &coll = allSourceStart[size];
      Moses::RemoveAllInColl(coll);
    }
  }
}

void ConsistentPhrases::Initialize(size_t size)
{
  m_coll.resize(size);

  for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
    std::vector<Coll> &allSourceStart = m_coll[sourceStart];
    allSourceStart.resize(size - sourceStart);
  }
}

void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
                            int targetStart, int targetEnd,
                            const Parameter &params)
{
  Coll &coll = m_coll[sourceStart][sourceEnd - sourceStart];
  ConsistentPhrase *cp = new ConsistentPhrase(sourceStart, sourceEnd,
      targetStart, targetEnd,
      params);

  assert(coll.find(cp) == coll.end());
  coll.insert(cp);
}

const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
{
  const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
  const Coll &ret = allSourceStart[sourceEnd - sourceStart];
  return ret;
}

ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd)
{
  std::vector<Coll> &allSourceStart = m_coll[sourceStart];
  Coll &ret = allSourceStart[sourceEnd - sourceStart];
  return ret;
}

std::string ConsistentPhrases::Debug() const
{
  std::stringstream out;
  for (size_t start = 0; start < m_coll.size(); ++start) {
    const std::vector<Coll> &allSourceStart = m_coll[start];

    for (size_t size = 0; size < allSourceStart.size(); ++size) {
      const Coll &coll = allSourceStart[size];

      Coll::const_iterator iter;
      for (iter = coll.begin(); iter != coll.end(); ++iter) {
        const ConsistentPhrase &consistentPhrase = **iter;
        out << consistentPhrase.Debug() << endl;
      }
    }
  }

  return out.str();
}

void ConsistentPhrases::AddHieroNonTerms(const Parameter &params)
{
  // add [X] labels everywhere
  for (size_t i = 0; i < m_coll.size(); ++i) {
    vector<Coll> &inner = m_coll[i];
    for (size_t j = 0; j < inner.size(); ++j) {
      ConsistentPhrases::Coll &coll = inner[j];
      ConsistentPhrases::Coll::iterator iter;
      for (iter = coll.begin(); iter != coll.end(); ++iter) {
        ConsistentPhrase &cp = **iter;
        cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
      }
    }
  }
}