File size: 6,509 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
// -*- c++ -*-
#ifndef moses_FeatureFunction_h
#define moses_FeatureFunction_h
#include <vector>
#include <set>
#include <string>
#include "moses/FeatureVector.h"
#include "moses/TypeDef.h"
#include "moses/parameters/AllOptions.h"
#include <boost/shared_ptr.hpp>
namespace Moses
{
class AllOptions;
class Phrase;
class TargetPhrase;
class TranslationOptionList;
class TranslationOption;
class Hypothesis;
class ChartHypothesis;
class InputType;
class ScoreComponentCollection;
class Bitmap;
class Range;
class FactorMask;
class InputPath;
class StackVec;
class DistortionScoreProducer;
class TranslationTask;
/** base class for all feature functions.
*/
class FeatureFunction
{
protected:
/**< all the score producers in this run */
static std::vector<FeatureFunction*> s_staticColl;
std::string m_description, m_argLine;
std::vector<std::vector<std::string> > m_args;
bool m_tuneable;
bool m_requireSortingAfterSourceContext;
size_t m_verbosity;
size_t m_numScoreComponents;
size_t m_index; // index into vector covering ALL feature function values
std::vector<bool> m_tuneableComponents;
size_t m_numTuneableComponents;
AllOptions::ptr m_options;
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
public:
static void Register(FeatureFunction* ff);
private:
// void Initialize(const std::string &line);
void ParseLine(const std::string &line);
public:
static const std::vector<FeatureFunction*>& GetFeatureFunctions() {
return s_staticColl;
}
static FeatureFunction &FindFeatureFunction(const std::string& name);
static void Destroy();
FeatureFunction(const std::string &line, bool registerNow);
FeatureFunction(size_t numScoreComponents, const std::string &line, bool registerNow = true);
virtual bool IsStateless() const = 0;
virtual ~FeatureFunction();
//! override to load model files
virtual void Load(AllOptions::ptr const& opts) {
m_options = opts;
}
AllOptions::ptr const&
options() const {
return m_options;
}
static void ResetDescriptionCounts() {
description_counts.clear();
}
//! returns the number of scores that a subclass produces.
//! For example, a language model conventionally produces 1, a translation table some arbitrary number, etc
size_t GetNumScoreComponents() const {
return m_numScoreComponents;
}
//! returns a string description of this producer
const std::string& GetScoreProducerDescription() const {
return m_description;
}
FName GetFeatureName(const std::string& name) const {
return FName(GetScoreProducerDescription(), name);
}
//! if false, then this feature is not displayed in the n-best list.
// use with care
virtual bool IsTuneable() const {
return m_tuneable;
}
virtual bool HasTuneableComponents() const {
return m_numTuneableComponents;
}
virtual bool IsTuneableComponent(size_t i) const {
if (m_numTuneableComponents == m_numScoreComponents) {
return true;
}
return m_tuneableComponents[i];
}
virtual bool RequireSortingAfterSourceContext() const {
return m_requireSortingAfterSourceContext;
}
virtual std::vector<float> DefaultWeights() const;
size_t GetIndex() const;
size_t SetIndex(size_t const idx);
protected:
virtual void
CleanUpAfterSentenceProcessing(InputType const& source) { }
public:
//! Called before search and collecting of translation options
virtual void
InitializeForInput(ttasksptr const& ttask) { };
// clean up temporary memory, called after processing each sentence
virtual void
CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
const std::string &
GetArgLine() const {
return m_argLine;
}
// given a target phrase containing only factors specified in mask
// return true if the feature function can be evaluated
virtual bool IsUseable(const FactorMask &mask) const = 0;
// used by stateless ff and stateful ff. Calculate initial score
// estimate during loading of phrase table
//
// source phrase is the substring that the phrase table uses to look
// up the target phrase,
//
// may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw
// source from the input sentence
virtual void
EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase,
ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection& estimatedScores) const = 0;
// for context-dependent processing
static void SetupAll(TranslationTask const& task);
virtual void Setup(TranslationTask const& task) const { };
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
// 'stackVec' is a vector of chart cells that the RHS non-terms cover.
// It is guaranteed to be in the same order as the non-terms in the source phrase.
// For pb models, stackvec is NULL.
// No FF should set estimatedScores in both overloads!
virtual void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores = NULL) const = 0;
// This method is called once all the translation options are retrieved from the phrase table, and
// just before search.
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
// 'stackVec' is a vector of chart cells that the RHS non-terms cover.
// It is guaranteed to be in the same order as the non-terms in the source phrase.
// For pb models, stackvec is NULL.
// No FF should set estimatedScores in both overloads!
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const = 0;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();
virtual void SetTuneableComponents(const std::string& value);
};
}
#endif
|