File size: 1,634 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
// memscore - in-memory phrase scoring for Statistical Machine Translation
// Christian Hardmeier, FBK-irst, Trento, 2010
// $Id$
#include <cmath>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <n_gram.h>
#include <lmtable.h>
#include "phrasetable.h"
#include "phraselm.h"
void PhraseLanguageModel::attach(PhraseInfoList &pilist)
{
phrase_info_list_ = &pilist;
score_idx_ = pilist.register_data(1);
}
void PhraseLanguageModel::compute_statistic()
{
compute_lmscores(*phrase_info_list_, false);
}
void PhraseLanguageModel::compute_lmscores(PhraseInfoList &phrase_info_list, bool closed_world)
{
lmtable lm;
std::ifstream lmstream(lmfile_.c_str());
lm.load(lmstream, lmfile_.c_str(), NULL, 0);
lm.setlogOOVpenalty(10000000);
assert(!computation_done_);
Score marginal_score = .0;
for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) {
PhraseInfo &pi = *it;
ngram ng(lm.getDict());
Score lmscore = 0;
for(PhraseText::const_string_iterator it = pi.get_phrase().string_begin(); it != pi.get_phrase().string_end(); it++) {
ng.pushw(it->c_str());
lmscore += lm.clprob(ng);
}
pi.data(score_idx_) = exp10(lmscore);
marginal_score += pi.data(score_idx_);
}
if(closed_world)
for(PhraseInfoList::iterator it = phrase_info_list.begin(); it != phrase_info_list.end(); ++it) {
PhraseInfo &pi = *it;
pi.data(score_idx_) /= marginal_score;
}
computation_done_ = true;
}
void ClosedPhraseLanguageModel::compute_statistic()
{
compute_lmscores(*phrase_info_list_, true);
}
|