// $Id$ /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifndef moses_LanguageModelReloading_h #define moses_LanguageModelReloading_h #include #include "moses/LM/Base.h" #include "moses/LM/Ken.h" #include "util/tokenize_piece.hh" #include "util/string_stream.hh" #include namespace Moses { class FFState; template class ReloadingLanguageModel : public LanguageModelKen { public: ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) { VERBOSE(1, "ReloadingLM constructor: " << m_file << std::endl); } virtual void InitializeForInput(ttasksptr const& ttask) { VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl); // The context scope object for this translation task // contains a map of translation task-specific data boost::shared_ptr contextScope = ttask->GetScope(); // The key to the map is this object void const* key = static_cast(this); // The value stored in the map is a string representing a phrase table boost::shared_ptr value = contextScope->get(key); // Create a stream to read the phrase table data stringstream strme(*(value.get())); ofstream tmp; tmp.open(m_file.c_str()); // Read the phrase table data, one line at a time string line; while (getline(strme, line)) { tmp << line << "\n"; } tmp.close(); LanguageModelKen::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ); }; protected: using LanguageModelKen::m_ngram; using LanguageModelKen::m_lmIdLookup; using LanguageModelKen::m_beginSentenceFactor; const std::string m_file; bool m_lazy; }; LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy) { lm::ngram::ModelType model_type; if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) { switch(model_type) { case lm::ngram::PROBING: return new ReloadingLanguageModel(line, file, factorType, lazy); case lm::ngram::REST_PROBING: return new ReloadingLanguageModel(line, file, factorType, lazy); case lm::ngram::TRIE: return new ReloadingLanguageModel(line, file, factorType, lazy); case lm::ngram::QUANT_TRIE: return new ReloadingLanguageModel(line, file, factorType, lazy); case lm::ngram::ARRAY_TRIE: return new ReloadingLanguageModel(line, file, factorType, lazy); case lm::ngram::QUANT_ARRAY_TRIE: return new ReloadingLanguageModel(line, file, factorType, lazy); default: UTIL_THROW2("Unrecognized kenlm model type " << model_type); } } else { return new ReloadingLanguageModel(line, file, factorType, lazy); } } LanguageModel *ConstructReloadingLM(const std::string &lineOrig) { FactorType factorType = 0; std::string filePath; bool lazy = false; util::TokenIter argument(lineOrig, ' '); ++argument; // KENLM util::StringStream line; line << "KENLM"; for (; argument; ++argument) { const char *equals = std::find(argument->data(), argument->data() + argument->size(), '='); UTIL_THROW_IF2(equals == argument->data() + argument->size(), "Expected = in ReloadingLM argument " << *argument); StringPiece name(argument->data(), equals - argument->data()); StringPiece value(equals + 1, argument->data() + argument->size() - equals - 1); if (name == "factor") { factorType = boost::lexical_cast(value); } else if (name == "order") { // Ignored } else if (name == "path") { filePath.assign(value.data(), value.size()); } else if (name == "lazyken") { lazy = boost::lexical_cast(value); } else { // pass to base class to interpret line << " " << name << "=" << value; } } return ConstructReloadingLM(line.str(), filePath, factorType, lazy); } } // namespace Moses #endif