|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef moses_LanguageModelReloading_h |
|
#define moses_LanguageModelReloading_h |
|
|
|
#include <string> |
|
|
|
#include "moses/LM/Base.h" |
|
#include "moses/LM/Ken.h" |
|
|
|
#include "util/tokenize_piece.hh" |
|
#include "util/string_stream.hh" |
|
|
|
#include <iostream> |
|
namespace Moses |
|
{ |
|
|
|
class FFState; |
|
template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model> |
|
{ |
|
public: |
|
|
|
ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) { |
|
|
|
VERBOSE(1, "ReloadingLM constructor: " << m_file << std::endl); |
|
|
|
} |
|
|
|
virtual void InitializeForInput(ttasksptr const& ttask) { |
|
VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl); |
|
|
|
|
|
|
|
boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope(); |
|
|
|
|
|
void const* key = static_cast<void const*>(this); |
|
|
|
|
|
boost::shared_ptr<string> value = contextScope->get<string>(key); |
|
|
|
|
|
stringstream strme(*(value.get())); |
|
|
|
ofstream tmp; |
|
tmp.open(m_file.c_str()); |
|
|
|
|
|
string line; |
|
while (getline(strme, line)) { |
|
|
|
tmp << line << "\n"; |
|
|
|
} |
|
|
|
tmp.close(); |
|
|
|
LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ); |
|
}; |
|
|
|
|
|
protected: |
|
|
|
using LanguageModelKen<Model>::m_ngram; |
|
using LanguageModelKen<Model>::m_lmIdLookup; |
|
using LanguageModelKen<Model>::m_beginSentenceFactor; |
|
|
|
const std::string m_file; |
|
bool m_lazy; |
|
}; |
|
|
|
|
|
LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy) |
|
{ |
|
lm::ngram::ModelType model_type; |
|
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) { |
|
switch(model_type) { |
|
case lm::ngram::PROBING: |
|
return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy); |
|
case lm::ngram::REST_PROBING: |
|
return new ReloadingLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy); |
|
case lm::ngram::TRIE: |
|
return new ReloadingLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy); |
|
case lm::ngram::QUANT_TRIE: |
|
return new ReloadingLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy); |
|
case lm::ngram::ARRAY_TRIE: |
|
return new ReloadingLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy); |
|
case lm::ngram::QUANT_ARRAY_TRIE: |
|
return new ReloadingLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy); |
|
default: |
|
UTIL_THROW2("Unrecognized kenlm model type " << model_type); |
|
} |
|
} else { |
|
return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy); |
|
} |
|
} |
|
|
|
LanguageModel *ConstructReloadingLM(const std::string &lineOrig) |
|
{ |
|
FactorType factorType = 0; |
|
std::string filePath; |
|
bool lazy = false; |
|
|
|
util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' '); |
|
++argument; |
|
|
|
util::StringStream line; |
|
line << "KENLM"; |
|
|
|
for (; argument; ++argument) { |
|
const char *equals = std::find(argument->data(), argument->data() + argument->size(), '='); |
|
UTIL_THROW_IF2(equals == argument->data() + argument->size(), |
|
"Expected = in ReloadingLM argument " << *argument); |
|
StringPiece name(argument->data(), equals - argument->data()); |
|
StringPiece value(equals + 1, argument->data() + argument->size() - equals - 1); |
|
if (name == "factor") { |
|
factorType = boost::lexical_cast<FactorType>(value); |
|
} else if (name == "order") { |
|
|
|
} else if (name == "path") { |
|
filePath.assign(value.data(), value.size()); |
|
} else if (name == "lazyken") { |
|
lazy = boost::lexical_cast<bool>(value); |
|
} else { |
|
|
|
line << " " << name << "=" << value; |
|
} |
|
} |
|
|
|
return ConstructReloadingLM(line.str(), filePath, factorType, lazy); |
|
} |
|
|
|
|
|
} |
|
|
|
#endif |
|
|
|
|