|
|
|
|
|
|
|
#pragma once |
|
#define PROVIDES_RANKED_SAMPLING 0 |
|
|
|
#include <boost/thread.hpp> |
|
#include <boost/scoped_ptr.hpp> |
|
#include <boost/intrusive_ptr.hpp> |
|
|
|
#include "moses/TypeDef.h" |
|
#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h" |
|
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h" |
|
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h" |
|
#include "moses/TranslationModel/UG/generic/threading/ug_thread_pool.h" |
|
|
|
#include "moses/TranslationModel/UG/mm/ug_mm_ttrack.h" |
|
#include "moses/TranslationModel/UG/mm/ug_mm_tsa.h" |
|
#include "moses/TranslationModel/UG/mm/tpt_tokenindex.h" |
|
#include "moses/TranslationModel/UG/mm/ug_corpus_token.h" |
|
#include "moses/TranslationModel/UG/mm/ug_typedefs.h" |
|
#include "moses/TranslationModel/UG/mm/tpt_pickler.h" |
|
#include "moses/TranslationModel/UG/mm/ug_bitext.h" |
|
#include "moses/TranslationModel/UG/mm/ug_bitext_sampler.h" |
|
#include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h" |
|
|
|
#include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h" |
|
|
|
#ifndef NO_MOSES |
|
#include "moses/FF/LexicalReordering/LexicalReordering.h" |
|
#endif |
|
|
|
#include "moses/InputFileStream.h" |
|
#include "moses/FactorTypeSet.h" |
|
#include "moses/TargetPhrase.h" |
|
#include <boost/dynamic_bitset.hpp> |
|
#include "moses/TargetPhraseCollection.h" |
|
#include "util/usage.hh" |
|
#include <map> |
|
|
|
#include "moses/TranslationModel/PhraseDictionary.h" |
|
#include "sapt_phrase_scorers.h" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace Moses |
|
{ |
|
class Mmsapt |
|
#ifndef NO_MOSES |
|
: public PhraseDictionary |
|
#endif |
|
{ |
|
class TPCOllCache; |
|
friend class Alignment; |
|
std::map<std::string,std::string> param; |
|
std::string m_name; |
|
#ifndef NO_MOSES |
|
|
|
friend class PhraseDictionaryGroup; |
|
#endif |
|
public: |
|
typedef sapt::L2R_Token<sapt::SimpleWordId> Token; |
|
typedef sapt::mmBitext<Token> mmbitext; |
|
typedef sapt::imBitext<Token> imbitext; |
|
typedef sapt::Bitext<Token> bitext; |
|
typedef sapt::TSA<Token> tsa; |
|
typedef sapt::PhraseScorer<Token> pscorer; |
|
private: |
|
|
|
SPTR<mmbitext> btfix; |
|
SPTR<imbitext> btdyn; |
|
std::string m_bname, m_extra_data, m_bias_file,m_bias_server; |
|
std::string L1; |
|
std::string L2; |
|
float m_lbop_conf; |
|
float m_lex_alpha; |
|
|
|
|
|
size_t m_default_sample_size; |
|
size_t m_min_sample_size; |
|
size_t m_workers; |
|
std::vector<std::string> m_feature_set_names; |
|
std::string m_bias_logfile; |
|
boost::scoped_ptr<std::ofstream> m_bias_logger; |
|
std::ostream* m_bias_log; |
|
int m_bias_loglevel; |
|
#ifndef NO_MOSES |
|
LexicalReordering* m_lr_func; |
|
#endif |
|
std::string m_lr_func_name; |
|
sapt::sampling_method m_sampling_method; |
|
boost::scoped_ptr<ug::ThreadPool> m_thread_pool; |
|
public: |
|
void* const bias_key; |
|
void* const cache_key; |
|
void* const context_key; |
|
private: |
|
boost::shared_ptr<sapt::SamplingBias> m_bias; |
|
boost::shared_ptr<TPCollCache> m_cache; |
|
size_t m_cache_size; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> m_feature_names; |
|
std::vector<bool> m_is_logval; |
|
std::vector<bool> m_is_integer; |
|
|
|
std::vector<SPTR<pscorer > > m_active_ff_fix; |
|
std::vector<SPTR<pscorer > > m_active_ff_dyn; |
|
std::vector<SPTR<pscorer > > m_active_ff_common; |
|
|
|
|
|
bool m_track_coord; |
|
|
|
|
|
std::vector<std::vector<SPTR<std::vector<float> > > > m_sid_coord_list; |
|
std::vector<size_t> m_coord_spaces; |
|
|
|
void |
|
parse_factor_spec(std::vector<FactorType>& flist, std::string const key); |
|
|
|
void |
|
register_ff(SPTR<pscorer> const& ff, std::vector<SPTR<pscorer> > & registry); |
|
|
|
template<typename fftype> |
|
void |
|
check_ff(std::string const ffname,std::vector<SPTR<pscorer> >* registry = NULL); |
|
|
|
|
|
template<typename fftype> |
|
void |
|
check_ff(std::string const ffname, float const xtra, |
|
std::vector<SPTR<pscorer> >* registry = NULL); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void init(std::string const& line); |
|
mutable boost::shared_mutex m_lock; |
|
|
|
|
|
bool withPbwd; |
|
bool poolCounts; |
|
std::vector<FactorType> m_ifactor, m_ofactor; |
|
|
|
void setup_local_feature_functions(); |
|
void setup_bias(ttasksptr const& ttask); |
|
|
|
#if PROVIDES_RANKED_SAMPLING |
|
void |
|
set_bias_for_ranking(ttasksptr const& ttask, SPTR<sapt::Bitext<Token> const> bt); |
|
#endif |
|
private: |
|
|
|
void read_config_file(std::string fname, std::map<std::string,std::string>& param); |
|
|
|
|
|
std::vector<float> feature_weights; |
|
|
|
std::vector<std::vector<tpt::id_type> > wlex21; |
|
|
|
typedef sapt::mm2dTable<tpt::id_type,tpt::id_type,uint32_t,uint32_t> mm2dtable_t; |
|
mm2dtable_t COOCraw; |
|
|
|
TargetPhrase* |
|
mkTPhrase(ttasksptr const& ttask, |
|
Phrase const& src, |
|
sapt::PhrasePair<Token>* fix, |
|
sapt::PhrasePair<Token>* dyn, |
|
SPTR<sapt::Bitext<Token> > const& dynbt) const; |
|
|
|
void |
|
process_pstats |
|
(Phrase const& src, |
|
uint64_t const pid1, |
|
sapt::pstats const& stats, |
|
sapt::Bitext<Token> const & bt, |
|
TargetPhraseCollection::shared_ptr tpcoll |
|
) const; |
|
|
|
bool |
|
pool_pstats |
|
(Phrase const& src, |
|
uint64_t const pid1a, sapt::pstats * statsa, sapt::Bitext<Token> const & bta, |
|
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, |
|
TargetPhraseCollection::shared_ptr tpcoll) const; |
|
|
|
bool |
|
combine_pstats |
|
(Phrase const& src, |
|
uint64_t const pid1a, sapt::pstats* statsa, sapt::Bitext<Token> const & bta, |
|
uint64_t const pid1b, sapt::pstats const* statsb, sapt::Bitext<Token> const & btb, |
|
TargetPhraseCollection::shared_ptr tpcoll) const; |
|
|
|
void load_extra_data(std::string bname, bool locking); |
|
void load_bias(std::string bname); |
|
|
|
public: |
|
|
|
Mmsapt(std::string const& line); |
|
|
|
void Load(AllOptions::ptr const& opts); |
|
void Load(AllOptions::ptr const& opts, bool with_checks); |
|
size_t SetTableLimit(size_t limit); |
|
std::string const& GetName() const; |
|
|
|
#ifndef NO_MOSES |
|
TargetPhraseCollection::shared_ptr |
|
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const; |
|
|
|
|
|
|
|
|
|
void |
|
GetTargetPhraseCollectionBatch |
|
(ttasksptr const& ttask, InputPathList const& inputPathQueue) const; |
|
|
|
|
|
ChartRuleLookupManager* |
|
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &); |
|
|
|
ChartRuleLookupManager* |
|
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &, |
|
std::size_t); |
|
#endif |
|
|
|
void add(std::string const& s1, std::string const& s2, std::string const& a); |
|
|
|
|
|
void setWeights(std::vector<float> const& w); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool ProvidesPrefixCheck() const; |
|
|
|
bool PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const; |
|
|
|
bool isLogVal(int i) const; |
|
bool isInteger(int i) const; |
|
|
|
|
|
void InitializeForInput(ttasksptr const& ttask); |
|
|
|
void CleanUpAfterSentenceProcessing(ttasksptr const& ttask); |
|
|
|
|
|
SPTR<std::vector<int> > |
|
align(std::string const& src, std::string const& trg) const; |
|
|
|
std::vector<std::string> const& |
|
GetFeatureNames() const; |
|
|
|
SPTR<sapt::DocumentBias> |
|
setupDocumentBias(std::map<std::string,float> const& bias) const; |
|
|
|
std::vector<float> DefaultWeights() const; |
|
}; |
|
} |
|
|
|
|