File size: 3,545 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
#ifndef moses_Diffs_h
#define moses_Diffs_h
#include <cmath>
namespace Moses
{
typedef char Diff;
typedef std::vector<Diff> Diffs;
template <class Sequence, class Pred>
void CreateDiffRec(size_t** c,
const Sequence &s1,
const Sequence &s2,
size_t start,
size_t i,
size_t j,
Diffs& diffs,
Pred pred)
{
if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
diffs.push_back(Diff('m'));
} else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
diffs.push_back(Diff('i'));
} else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
diffs.push_back(Diff('d'));
}
}
template <class Sequence, class Pred>
Diffs CreateDiff(const Sequence& s1,
const Sequence& s2,
Pred pred)
{
Diffs diffs;
size_t n = s2.size();
int start = 0;
int m_end = s1.size() - 1;
int n_end = s2.size() - 1;
while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
diffs.push_back(Diff('m'));
start++;
}
while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
m_end--;
n_end--;
}
size_t m_new = m_end - start + 1;
size_t n_new = n_end - start + 1;
size_t** c = new size_t*[m_new + 1];
for(size_t i = 0; i <= m_new; ++i) {
c[i] = new size_t[n_new + 1];
c[i][0] = 0;
}
for(size_t j = 0; j <= n_new; ++j)
c[0][j] = 0;
for(size_t i = 1; i <= m_new; ++i)
for(size_t j = 1; j <= n_new; ++j)
if(pred(s1[i - 1 + start], s2[j - 1 + start]))
c[i][j] = c[i-1][j-1] + 1;
else
c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
for(size_t i = 0; i <= m_new; ++i)
delete[] c[i];
delete[] c;
for (size_t i = n_end + 1; i < n; ++i)
diffs.push_back(Diff('m'));
return diffs;
}
template <class Sequence>
Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
{
return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
}
template <class Sequence, class Sig, class Stats>
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
{
if(sig.size() != stats.size())
throw "Signature size differs from score array size.";
size_t m = 0, d = 0, i = 0, s = 0;
Diffs diff = CreateDiff(s1, s2);
for(int j = 0; j < (int)diff.size(); ++j) {
if(diff[j] == 'm')
m++;
else if(diff[j] == 'd') {
d++;
int k = 0;
while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
d--;
s++;
k++;
}
j += k;
} else if(diff[j] == 'i')
i++;
}
for(size_t j = 0; j < sig.size(); ++j) {
switch (sig[j]) {
case 'l':
stats[j] += d + i + s;
break;
case 'm':
stats[j] += m;
break;
case 'd':
stats[j] += d;
break;
case 'i':
stats[j] += i;
break;
case 's':
stats[j] += s;
break;
case 'r':
float macc = 1;
if (d + i + s + m)
macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
if(macc > 0)
stats[j] += log(macc);
else
stats[j] += log(1.0/(float)(d + i + s + m + 1));
break;
}
}
}
}
#endif
|