File size: 2,987 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
#include "SpanLengthPhraseProperty.h"
#include "moses/Util.h"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
SpanLengthPhraseProperty::SpanLengthPhraseProperty()
{
}
void SpanLengthPhraseProperty::ProcessValue(const std::string &value)
{
vector<string> toks;
Tokenize(toks, value);
set< vector<string> > indices;
for (size_t i = 0; i < toks.size(); ++i) {
const string &span = toks[i];
// is it a ntIndex,sourceSpan,targetSpan or count ?
vector<string> toks;
Tokenize<string>(toks, span, ",");
UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
if (toks.size() == 1) {
float count = Scan<float>(toks[0]);
Populate(indices, count);
indices.clear();
} else {
indices.insert(toks);
}
}
// totals
CalcTotals(m_source);
CalcTotals(m_target);
}
void SpanLengthPhraseProperty::Populate(const set< vector<string> > &indices, float count)
{
set< vector<string> >::const_iterator iter;
for (iter = indices.begin(); iter != indices.end(); ++iter) {
const vector<string> &toksStr = *iter;
vector<size_t> toks = Scan<size_t>(toksStr);
UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
Populate(toks, count);
}
}
void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float count)
{
size_t ntInd = toks[0];
size_t sourceLength = toks[1];
size_t targetLength = toks[2];
if (ntInd >= m_source.size() ) {
m_source.resize(ntInd + 1);
m_target.resize(ntInd + 1);
}
Map &sourceMap = m_source[ntInd].first;
Map &targetMap = m_target[ntInd].first;
Populate(sourceMap, sourceLength, count);
Populate(targetMap, targetLength, count);
}
void SpanLengthPhraseProperty::Populate(Map &map, size_t span, float count)
{
Map::iterator iter;
iter = map.find(span);
if (iter != map.end()) {
float &value = iter->second;
value += count;
} else {
map[span] = count;
}
}
void SpanLengthPhraseProperty::CalcTotals(Vec &vec)
{
for (size_t i = 0; i < vec.size(); ++i) {
float total = 0;
const Map &map = vec[i].first;
Map::const_iterator iter;
for (iter = map.begin(); iter != map.end(); ++iter) {
float count = iter->second;
total += count;
}
vec[i].second = total;
}
}
float SpanLengthPhraseProperty::GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const
{
float count;
const std::pair<Map, float> &data = m_source[ntInd];
const Map &map = data.first;
if (map.size() == 0) {
// should this ever be reached? there shouldn't be any span length proprty so FF shouldn't call this
return 1.0f;
}
Map::const_iterator iter = map.find(sourceWidth);
if (iter == map.end()) {
count = 0;
} else {
count = iter->second;
}
count += smoothing;
float total = data.second + smoothing * (float) map.size();
float ret = count / total;
return ret;
}
}
|