// vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "util/exception.hh" #include "moses/TranslationModel/PhraseDictionary.h" #include "moses/TranslationModel/PhraseDictionaryCache.h" #include "moses/FactorCollection.h" #include "moses/InputFileStream.h" #include "moses/StaticData.h" #include "moses/TargetPhrase.h" using namespace std; namespace Moses { std::map< const std::string, PhraseDictionaryCache * > PhraseDictionaryCache::s_instance_map; PhraseDictionaryCache *PhraseDictionaryCache::s_instance = NULL; //! contructor PhraseDictionaryCache::PhraseDictionaryCache(const std::string &line) : PhraseDictionary(line, true) { std::cerr << "Initializing PhraseDictionaryCache feature..." << std::endl; //disabling internal cache (provided by PhraseDictionary) for translation options (third parameter set to 0) m_maxCacheSize = 0; m_entries = 0; m_name = "default"; m_constant = false; ReadParameters(); UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryCache feature named " + m_name + " is allowed"); s_instance_map[m_name] = this; s_instance = this; //for back compatibility vector weight = StaticData::Instance().GetWeights(this); m_numscorecomponent = weight.size(); m_sentences=0; } PhraseDictionaryCache::~PhraseDictionaryCache() { Clear(); } void PhraseDictionaryCache::SetParameter(const std::string& key, const std::string& value) { VERBOSE(2, "PhraseDictionaryCache::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl); if (key == "cache-name") { m_name = Scan(value); } else if (key == "input-factor") { m_inputFactorsVec = Tokenize(value,","); } else if (key == "output-factor") { m_outputFactorsVec = Tokenize(value,","); } else { PhraseDictionary::SetParameter(key, value); } } void PhraseDictionaryCache::CleanUpAfterSentenceProcessing(const InputType& source) { Clear(source.GetTranslationId()); } void PhraseDictionaryCache::InitializeForInput(ttasksptr const& ttask) { #ifdef WITH_THREADS boost::unique_lock lock(m_cacheLock); #endif long tID = ttask->GetSource()->GetTranslationId(); TargetPhraseCollection::shared_ptr tpc; if (m_cacheTM.find(tID) == m_cacheTM.end()) return; for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) { tpc.reset(new TargetPhraseCollection(*(it->second).first)); std::vector::const_iterator it2 = tpc->begin(); while (it2 != tpc->end()) { ((TargetPhrase*) *it2)->EvaluateInIsolation(it->first, GetFeaturesToApply()); it2++; } } if (tpc) { tpc->NthElement(m_tableLimit); // sort the phrases for the decoder } } void PhraseDictionaryCache::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const { #ifdef WITH_THREADS boost::shared_lock read_lock(m_cacheLock); #endif InputPathList::const_iterator iter; for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { InputPath &inputPath = **iter; long tID = inputPath.ttask->GetSource()->GetTranslationId(); if (m_cacheTM.find(tID) == m_cacheTM.end()) continue; const Phrase &source = inputPath.GetPhrase(); TargetPhraseCollection::shared_ptr tpc; for(cacheMap::const_iterator it=m_cacheTM.at(tID).begin(); it != m_cacheTM.at(tID).end(); it++) { if (source.Compare(it->first)!=0) continue; tpc.reset(new TargetPhraseCollection(*(it->second).first)); inputPath.SetTargetPhrases(*this, tpc, NULL); } } } TargetPhraseCollection::shared_ptr PhraseDictionaryCache::GetTargetPhraseCollection(const Phrase &source, long tID) const { #ifdef WITH_THREADS boost::shared_lock read_lock(m_cacheLock); #endif TargetPhraseCollection::shared_ptr tpc; if(m_cacheTM.find(tID) == m_cacheTM.end()) return tpc; cacheMap::const_iterator it = m_cacheTM.at(tID).find(source); if(it != m_cacheTM.at(tID).end()) { tpc.reset(new TargetPhraseCollection(*(it->second).first)); std::vector::const_iterator it2 = tpc->begin(); while (it2 != tpc->end()) { ((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply()); it2++; } } if (tpc) { tpc->NthElement(m_tableLimit); // sort the phrases for the decoder } return tpc; } ChartRuleLookupManager* PhraseDictionaryCache::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t /*maxChartSpan*/) { UTIL_THROW(util::Exception, "Not implemented for Chart Decoder"); } // friend ostream& operator<<(ostream& out, const PhraseDictionaryCache& phraseDict) { return out; } void PhraseDictionaryCache::Insert(std::string &entries, long tID) { if (entries != "") { VERBOSE(3,"entries:|" << entries << "|" << " tID | " << tID << std::endl); std::vector elements = TokenizeMultiCharSeparator(entries, "||||"); VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl); Insert(elements, tID); } } void PhraseDictionaryCache::Insert(std::vector entries, long tID) { VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl); Update(tID, entries); IFVERBOSE(3) Print(); } void PhraseDictionaryCache::Update(long tID, std::vector entries) { std::vector pp; std::vector::iterator it; for(it = entries.begin(); it!=entries.end(); it++) { pp.clear(); pp = TokenizeMultiCharSeparator((*it), "|||"); VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl); VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl); if (pp.size() > 3) { VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); VERBOSE(3,"pp[3]:|" << pp[3] << "|" << std::endl); Update(tID,pp[0], pp[1], pp[2], pp[3]); } else if (pp.size() > 2) { VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl); Update(tID,pp[0], pp[1], pp[2]); } else { Update(tID,pp[0], pp[1]); } } } Scores PhraseDictionaryCache::Conv2VecFloats(std::string& s) { std::vector n; if (s.empty()) return n; std::istringstream iss(s); std::copy(std::istream_iterator(iss), std::istream_iterator(), std::back_inserter(n)); return n; } void PhraseDictionaryCache::Update(long tID, std::string sourcePhraseString, std::string targetPhraseString, std::string scoreString, std::string waString) { const StaticData &staticData = StaticData::Instance(); Phrase sourcePhrase(0); TargetPhrase targetPhrase(0); char *err_ind_temp; Scores scores = Conv2VecFloats(scoreString); //target targetPhrase.Clear(); // change here for factored based CBTM VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl); targetPhrase.CreateFromString(Output, m_outputFactorsVec, targetPhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl); //TODO: Would be better to reuse source phrases, but ownership has to be //consistent across phrase table implementations sourcePhrase.Clear(); VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl); sourcePhrase.CreateFromString(Input, m_inputFactorsVec, sourcePhraseString, /*factorDelimiter,*/ NULL); VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl); if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl); Update(tID, sourcePhrase, targetPhrase, scores, waString); } void PhraseDictionaryCache::Update(long tID, Phrase sp, TargetPhrase tp, Scores scores, std::string waString) { VERBOSE(3,"PhraseDictionaryCache::Update(Phrase sp, TargetPhrase tp, Scores scores, std::string waString)" << std::endl); #ifdef WITH_THREADS boost::unique_lock lock(m_cacheLock); #endif VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| word-alignment |" << waString << "|" << std::endl); // if there is no cache for the sentence tID, create one. cacheMap::const_iterator it = m_cacheTM[tID].find(sp); VERBOSE(3,"sp:|" << sp << "|" << std::endl); if(it!=m_cacheTM.at(tID).end()) { VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl); // sp is found TargetCollectionPair TgtCollPair = it->second; TargetPhraseCollection::shared_ptr tpc = TgtCollPair.first; Scores* sc = TgtCollPair.second; const Phrase* p_ptr = NULL; TargetPhrase* tp_ptr = NULL; bool found = false; size_t tp_pos=0; while (!found && tp_pos < tpc->GetSize()) { tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos); p_ptr = (const TargetPhrase*) tp_ptr; if ((Phrase) tp == *p_ptr) { found = true; continue; } tp_pos++; } if (!found) { VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl); std::auto_ptr targetPhrase(new TargetPhrase(tp)); Scores scoreVec; for (unsigned int i=0; iGetScoreBreakdown().Assign(this, scoreVec); if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); tpc->Add(targetPhrase.release()); tp_pos = tpc->GetSize()-1; sc = &scores; m_entries++; VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl); } else { Scores scoreVec; for (unsigned int i=0; iGetScoreBreakdown().Assign(this, scoreVec); if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString); VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl); } } else { VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl); // p is not found // create target collection TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection); Scores* sc = new Scores(); m_cacheTM[tID].insert(make_pair(sp,std::make_pair(tpc,sc))); //tp is not found std::auto_ptr targetPhrase(new TargetPhrase(tp)); // scoreVec is a composition of decay_score and the feature scores Scores scoreVec; for (unsigned int i=0; iGetScoreBreakdown().Assign(this, scoreVec); if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString); tpc->Add(targetPhrase.release()); sc = &scores; m_entries++; VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl); } } void PhraseDictionaryCache::Execute(std::string command, long tID) { VERBOSE(2,"command:|" << command << "|" << std::endl); std::vector commands = Tokenize(command, "||"); Execute(commands, tID); } void PhraseDictionaryCache::Execute(std::vector commands, long tID) { for (size_t j=0; jfirst); } } void PhraseDictionaryCache::Clear(long tID) { #ifdef WITH_THREADS boost::unique_lock lock(m_cacheLock); #endif if (m_cacheTM.find(tID) == m_cacheTM.end()) return; cacheMap::iterator it; for(it = m_cacheTM.at(tID).begin(); it!=m_cacheTM.at(tID).end(); it++) { (((*it).second).second)->clear(); delete ((*it).second).second; ((*it).second).first.reset(); } m_cacheTM.at(tID).clear(); m_entries = 0; } void PhraseDictionaryCache::ExecuteDlt(std::map dlt_meta, long tID) { if (dlt_meta.find("cbtm") != dlt_meta.end()) { Insert(dlt_meta["cbtm"], tID); } if (dlt_meta.find("cbtm-command") != dlt_meta.end()) { Execute(dlt_meta["cbtm-command"], tID); } if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) { Clear(); } } void PhraseDictionaryCache::Print() const { VERBOSE(2,"PhraseDictionaryCache::Print()" << std::endl); #ifdef WITH_THREADS boost::shared_lock read_lock(m_cacheLock); #endif for(sentCacheMap::const_iterator itr = m_cacheTM.begin(); itr!=m_cacheTM.end(); itr++) { cacheMap::const_iterator it; for(it = (itr->second).begin(); it!=(itr->second).end(); it++) { std::string source = (it->first).ToString(); TargetPhraseCollection::shared_ptr tpc = (it->second).first; TargetPhraseCollection::iterator itr; for(itr = tpc->begin(); itr != tpc->end(); itr++) { std::string target = (*itr)->ToString(); std::cout << source << " ||| " << target << std::endl; } source.clear(); } } } }// end namespace