|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <limits> |
|
#include <iostream> |
|
#include <fstream> |
|
|
|
#include "MaxEntSRI.h" |
|
#include "moses/TypeDef.h" |
|
#include "moses/Util.h" |
|
#include "moses/FactorCollection.h" |
|
#include "moses/Phrase.h" |
|
#include "moses/StaticData.h" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __APPLE__ |
|
#define HAVE_ZOPEN |
|
#endif |
|
|
|
#include "Vocab.h" |
|
#include "MEModel.h" |
|
|
|
using namespace std; |
|
|
|
namespace Moses |
|
{ |
|
LanguageModelMaxEntSRI::LanguageModelMaxEntSRI(const std::string &line) |
|
:LanguageModelSingleFactor(line) |
|
,m_srilmVocab(0) |
|
,m_srilmModel(0) |
|
{ |
|
ReadParameters(); |
|
} |
|
|
|
LanguageModelMaxEntSRI::~LanguageModelMaxEntSRI() |
|
{ |
|
delete m_srilmModel; |
|
delete m_srilmVocab; |
|
} |
|
|
|
void LanguageModelMaxEntSRI::Load(AllOptions::ptr const& opts) |
|
{ |
|
m_srilmVocab = new ::Vocab(); |
|
m_srilmModel = new MEModel(*m_srilmVocab, m_nGramOrder); |
|
|
|
m_srilmModel->skipOOVs() = false; |
|
|
|
File file( m_filePath.c_str(), "r" ); |
|
m_srilmModel->read(file); |
|
|
|
|
|
CreateFactors(); |
|
m_unknownId = m_srilmVocab->unkIndex(); |
|
} |
|
|
|
void LanguageModelMaxEntSRI::CreateFactors() |
|
{ |
|
|
|
FactorCollection &factorCollection = FactorCollection::Instance(); |
|
|
|
std::map<size_t, VocabIndex> lmIdMap; |
|
size_t maxFactorId = 0; |
|
|
|
VocabString str; |
|
VocabIter iter(*m_srilmVocab); |
|
while ( (str = iter.next()) != NULL) { |
|
VocabIndex lmId = GetLmID(str); |
|
size_t factorId = factorCollection.AddFactor(Output, m_factorType, str)->GetId(); |
|
lmIdMap[factorId] = lmId; |
|
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; |
|
} |
|
|
|
size_t factorId; |
|
|
|
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_); |
|
factorId = m_sentenceStart->GetId(); |
|
lmIdMap[factorId] = GetLmID(BOS_); |
|
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; |
|
m_sentenceStartWord[m_factorType] = m_sentenceStart; |
|
|
|
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_); |
|
factorId = m_sentenceEnd->GetId(); |
|
lmIdMap[factorId] = GetLmID(EOS_); |
|
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId; |
|
m_sentenceEndWord[m_factorType] = m_sentenceEnd; |
|
|
|
|
|
m_lmIdLookup.resize(maxFactorId+1); |
|
|
|
fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId); |
|
|
|
map<size_t, VocabIndex>::iterator iterMap; |
|
for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap) { |
|
m_lmIdLookup[iterMap->first] = iterMap->second; |
|
} |
|
} |
|
|
|
VocabIndex LanguageModelMaxEntSRI::GetLmID( const std::string &str ) const |
|
{ |
|
return m_srilmVocab->getIndex( str.c_str(), m_unknownId ); |
|
} |
|
VocabIndex LanguageModelMaxEntSRI::GetLmID( const Factor *factor ) const |
|
{ |
|
size_t factorId = factor->GetId(); |
|
return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId]; |
|
} |
|
|
|
LMResult LanguageModelMaxEntSRI::GetValue(VocabIndex wordId, VocabIndex *context) const |
|
{ |
|
LMResult ret; |
|
ret.score = FloorScore(TransformLMScore(m_srilmModel->wordProb( wordId, context))); |
|
ret.unknown = (wordId == m_unknownId); |
|
return ret; |
|
} |
|
|
|
LMResult LanguageModelMaxEntSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState) const |
|
{ |
|
LMResult ret; |
|
FactorType factorType = GetFactorType(); |
|
size_t count = contextFactor.size(); |
|
if (count <= 0) { |
|
if(finalState) |
|
*finalState = NULL; |
|
ret.score = 0.0; |
|
ret.unknown = false; |
|
return ret; |
|
} |
|
|
|
|
|
VocabIndex ngram[count + 1]; |
|
for (size_t i = 0 ; i < count - 1 ; i++) { |
|
ngram[i+1] = GetLmID((*contextFactor[count-2-i])[factorType]); |
|
} |
|
ngram[count] = Vocab_None; |
|
|
|
UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL, |
|
"No factor " << factorType << " at position " << (count-1)); |
|
|
|
VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]); |
|
ret = GetValue(lmId, ngram+1); |
|
|
|
if (finalState) { |
|
ngram[0] = lmId; |
|
unsigned int dummy; |
|
*finalState = m_srilmModel->contextID(ngram, dummy); |
|
} |
|
return ret; |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|