File size: 6,437 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
#pragma once
#include <string>
#include <boost/thread/tss.hpp>
#include "vw/Classifier.h"
#include "moses/TypeDef.h"
#include "moses/TranslationTask.h"
#include "moses/Util.h"
#include "moses/FF/StatelessFeatureFunction.h"
namespace Moses
{
enum VWFeatureType {
vwft_source,
vwft_target,
vwft_targetContext
};
class VWFeatureBase : public StatelessFeatureFunction
{
public:
VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
: StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
// defaults
m_sourceFactors.push_back(0);
m_targetFactors.push_back(0);
}
bool IsUseable(const FactorMask &mask) const {
return true;
}
// Official hooks should do nothing. This is a hack to be able to define
// classifier features in the moses.ini configuration file.
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {}
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {}
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const {}
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const {}
// Common parameters for classifier features, both source and target features
virtual void SetParameter(const std::string& key, const std::string& value) {
if (key == "used-by") {
ParseUsedBy(value);
} else if (key == "source-factors") {
Tokenize<FactorType>(m_sourceFactors, value, ",");
} else if (key == "target-factors") {
Tokenize<FactorType>(m_targetFactors, value, ",");
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
// Return all classifier features, regardless of type
static const std::vector<VWFeatureBase*>& GetFeatures(std::string name = "VW0") {
UTIL_THROW_IF2(s_features.count(name) == 0, "No features registered for parent classifier: " + name);
return s_features[name];
}
// Return only source-dependent classifier features
static const std::vector<VWFeatureBase*>& GetSourceFeatures(std::string name = "VW0") {
UTIL_THROW_IF2(s_sourceFeatures.count(name) == 0, "No source features registered for parent classifier: " + name);
return s_sourceFeatures[name];
}
// Return only target-context classifier features
static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
// don't throw an exception when there are no target-context features, this feature type is not mandatory
return s_targetContextFeatures[name];
}
// Return only target-dependent classifier features
static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
return s_targetFeatures[name];
}
// Required length context (maximum context size of defined target-context features)
static size_t GetMaximumContextSize(std::string name = "VW0") {
return s_targetContextLength[name]; // 0 by default
}
// Overload to process source-dependent data, create features once for every
// source sentence word range.
virtual void operator()(const InputType &input
, const Range &sourceRange
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const = 0;
// Overload to process target-dependent features, create features once for
// every target phrase. One source word range will have at least one target
// phrase, but may have more.
virtual void operator()(const InputType &input
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const = 0;
// Overload to process target-context dependent features, these features are
// evaluated during decoding. For efficiency, features are not fed directly into
// the classifier object but instead output in the vector "features" and managed
// separately in VW.h.
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const = 0;
protected:
std::vector<FactorType> m_sourceFactors, m_targetFactors;
void UpdateRegister() {
for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
it != m_usedBy.end(); it++) {
s_features[*it].push_back(this);
if(m_featureType == vwft_source) {
s_sourceFeatures[*it].push_back(this);
} else if (m_featureType == vwft_targetContext) {
s_targetContextFeatures[*it].push_back(this);
UpdateContextSize(*it);
} else {
s_targetFeatures[*it].push_back(this);
}
}
}
private:
void ParseUsedBy(const std::string &usedBy) {
m_usedBy.clear();
Tokenize(m_usedBy, usedBy, ",");
}
void UpdateContextSize(const std::string &usedBy);
std::vector<std::string> m_usedBy;
VWFeatureType m_featureType;
static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
static std::map<std::string, size_t> s_targetContextLength;
};
}
|