File size: 3,420 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
#include "moses/PP/NonTermContextProperty.h"
#include <string>
#include <cassert>
#include "moses/Util.h"
#include "moses/FactorCollection.h"
using namespace std;
namespace Moses
{
NonTermContextProperty::NonTermContextProperty()
{
}
NonTermContextProperty::~NonTermContextProperty()
{
//RemoveAllInColl(m_probStores);
}
void NonTermContextProperty::ProcessValue(const std::string &value)
{
vector<string> toks;
Tokenize(toks, value);
FactorCollection &fc = FactorCollection::Instance();
size_t numNT = Scan<size_t>(toks[0]);
m_probStores.resize(numNT);
size_t ind = 1;
while (ind < toks.size()) {
vector<const Factor *> factors;
for (size_t nt = 0; nt < numNT; ++nt) {
size_t ntInd = Scan<size_t>(toks[ind]);
assert(nt == ntInd);
++ind;
for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
//cerr << "toks[" << ind << "]=" << toks[ind] << endl;
const Factor *factor = fc.AddFactor(toks[ind], false);
factors.push_back(factor);
++ind;
}
}
// done with the context. Just get the count and put it all into data structures
// cerr << "count=" << toks[ind] << endl;
float count = Scan<float>(toks[ind]);
++ind;
for (size_t i = 0; i < factors.size(); ++i) {
size_t ntInd = i / 4;
size_t contextInd = i % 4;
const Factor *factor = factors[i];
AddToMap(ntInd, contextInd, factor, count);
}
}
}
void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
{
if (ntIndex <= m_probStores.size()) {
m_probStores.resize(ntIndex + 1);
}
ProbStore &probStore = m_probStores[ntIndex];
probStore.AddToMap(index, factor, count);
}
float NonTermContextProperty::GetProb(size_t ntInd,
size_t contextInd,
const Factor *factor,
float smoothConstant) const
{
UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
const ProbStore &probStore = m_probStores[ntInd];
float ret = probStore.GetProb(contextInd, factor, smoothConstant);
return ret;
}
//////////////////////////////////////////
void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
{
Map &map = m_vec[index];
Map::iterator iter = map.find(factor);
if (iter == map.end()) {
map[factor] = count;
} else {
float &currCount = iter->second;
currCount += count;
}
m_totalCount += count;
}
float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
const Factor *factor,
float smoothConstant) const
{
float count = GetCount(contextInd, factor, smoothConstant);
float total = GetTotalCount(contextInd, smoothConstant);
float ret = count / total;
return ret;
}
float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
const Factor *factor,
float smoothConstant) const
{
const Map &map = m_vec[contextInd];
float count = smoothConstant;
Map::const_iterator iter = map.find(factor);
if (iter == map.end()) {
// nothing
} else {
count += iter->second;
}
return count;
}
float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
{
const Map &map = m_vec[contextInd];
return m_totalCount + smoothConstant * map.size();
}
} // namespace Moses
|