|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <queue> |
|
#include "moses/TranslationModel/PhraseDictionary.h" |
|
#include "moses/StaticData.h" |
|
#include "moses/InputType.h" |
|
#include "moses/TranslationOption.h" |
|
#include "moses/DecodeStep.h" |
|
#include "moses/DecodeGraph.h" |
|
#include "moses/InputPath.h" |
|
#include "util/exception.hh" |
|
|
|
using namespace std; |
|
|
|
namespace Moses |
|
{ |
|
std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl; |
|
|
|
PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow) |
|
: DecodeFeature(line, registerNow) |
|
, m_tableLimit(20) |
|
, m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE) |
|
{ |
|
m_id = s_staticColl.size(); |
|
s_staticColl.push_back(this); |
|
} |
|
|
|
bool |
|
PhraseDictionary:: |
|
ProvidesPrefixCheck() const |
|
{ |
|
return false; |
|
} |
|
|
|
TargetPhraseCollection::shared_ptr |
|
PhraseDictionary:: |
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const |
|
{ |
|
TargetPhraseCollection::shared_ptr ret; |
|
typedef std::pair<TargetPhraseCollection::shared_ptr , clock_t> entry; |
|
if (m_maxCacheSize) { |
|
CacheColl &cache = GetCache(); |
|
|
|
size_t hash = hash_value(src); |
|
|
|
CacheColl::iterator iter; |
|
iter = cache.find(hash); |
|
|
|
if (iter == cache.end()) { |
|
|
|
ret = GetTargetPhraseCollectionNonCacheLEGACY(src); |
|
if (ret) { |
|
ret.reset(new TargetPhraseCollection(*ret)); |
|
} |
|
cache[hash] = entry(ret, clock()); |
|
} else { |
|
iter->second.second = clock(); |
|
ret = iter->second.first; |
|
} |
|
} else { |
|
|
|
ret = GetTargetPhraseCollectionNonCacheLEGACY(src); |
|
} |
|
|
|
return ret; |
|
} |
|
|
|
TargetPhraseCollection::shared_ptr |
|
PhraseDictionary:: |
|
GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const |
|
{ |
|
UTIL_THROW(util::Exception, "Legacy method not implemented"); |
|
} |
|
|
|
|
|
TargetPhraseCollectionWithSourcePhrase::shared_ptr |
|
PhraseDictionary:: |
|
GetTargetPhraseCollectionLEGACY(InputType const& src,Range const& range) const |
|
{ |
|
UTIL_THROW(util::Exception, "Legacy method not implemented"); |
|
} |
|
|
|
void |
|
PhraseDictionary:: |
|
SetParameter(const std::string& key, const std::string& value) |
|
{ |
|
if (key == "cache-size") { |
|
m_maxCacheSize = Scan<size_t>(value); |
|
} else if (key == "path") { |
|
m_filePath = value; |
|
} else if (key == "table-limit") { |
|
m_tableLimit = Scan<size_t>(value); |
|
} else { |
|
DecodeFeature::SetParameter(key, value); |
|
} |
|
} |
|
|
|
void |
|
PhraseDictionary:: |
|
SetFeaturesToApply() |
|
{ |
|
|
|
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions(); |
|
for (size_t i = 0; i < allFeatures.size(); ++i) { |
|
FeatureFunction *feature = allFeatures[i]; |
|
if (feature->IsUseable(m_outputFactors)) { |
|
m_featuresToApply.push_back(feature); |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool |
|
PhraseDictionary:: |
|
PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const |
|
{ |
|
return true; |
|
} |
|
|
|
void |
|
PhraseDictionary:: |
|
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const |
|
{ |
|
InputPathList::const_iterator iter; |
|
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { |
|
InputPath &inputPath = **iter; |
|
|
|
|
|
if (!SatisfyBackoff(inputPath)) { |
|
continue; |
|
} |
|
|
|
const Phrase &phrase = inputPath.GetPhrase(); |
|
TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase); |
|
inputPath.SetTargetPhrases(*this, targetPhrases, NULL); |
|
} |
|
} |
|
|
|
|
|
void PhraseDictionary::ReduceCache() const |
|
{ |
|
Timer reduceCacheTime; |
|
reduceCacheTime.start(); |
|
CacheColl &cache = GetCache(); |
|
if (cache.size() <= m_maxCacheSize) return; |
|
|
|
|
|
priority_queue< clock_t > lastUsedTimes; |
|
CacheColl::iterator iter; |
|
iter = cache.begin(); |
|
while( iter != cache.end() ) { |
|
lastUsedTimes.push( iter->second.second ); |
|
iter++; |
|
} |
|
for( size_t i=0; i < lastUsedTimes.size()-m_maxCacheSize/2; i++ ) |
|
lastUsedTimes.pop(); |
|
clock_t cutoffLastUsedTime = lastUsedTimes.top(); |
|
|
|
|
|
iter = cache.begin(); |
|
while( iter != cache.end() ) { |
|
if (iter->second.second < cutoffLastUsedTime) { |
|
CacheColl::iterator iterRemove = iter++; |
|
|
|
cache.erase(iterRemove); |
|
} else iter++; |
|
} |
|
VERBOSE(2,"Reduced persistent translation option cache in " |
|
<< reduceCacheTime << " seconds." << std::endl); |
|
} |
|
|
|
CacheColl & |
|
PhraseDictionary:: |
|
GetCache() const |
|
{ |
|
CacheColl *cache; |
|
cache = m_cache.get(); |
|
if (cache == NULL) { |
|
cache = new CacheColl; |
|
m_cache.reset(cache); |
|
} |
|
assert(cache); |
|
return *cache; |
|
} |
|
|
|
bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const |
|
{ |
|
const Phrase &sourcePhrase = inputPath.GetPhrase(); |
|
|
|
assert(m_container); |
|
const DecodeGraph &decodeGraph = GetDecodeGraph(); |
|
size_t backoff = decodeGraph.GetBackoff(); |
|
|
|
if (backoff == 0) { |
|
|
|
return true; |
|
} |
|
|
|
if (sourcePhrase.GetSize() > backoff) { |
|
|
|
return false; |
|
} |
|
|
|
|
|
InputPath::TargetPhrases::const_iterator iter; |
|
for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) { |
|
const std::pair<TargetPhraseCollection::shared_ptr , const void*> &temp = iter->second; |
|
TargetPhraseCollection::shared_ptr tpCollPrev = temp.first; |
|
|
|
if (tpCollPrev && tpCollPrev->GetSize()) { |
|
|
|
return false; |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
} |
|
|
|
|