/*********************************************************************** | |
Moses - factored phrase-based language decoder | |
Copyright (C) 2012- University of Edinburgh | |
This library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Lesser General Public | |
License as published by the Free Software Foundation; either | |
version 2.1 of the License, or (at your option) any later version. | |
This library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Lesser General Public License for more details. | |
You should have received a copy of the GNU Lesser General Public | |
License along with this library; if not, write to the Free Software | |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
***********************************************************************/ | |
/** | |
* This contains extra features that can be added to the scorer. To add a new feature: | |
* 1. Implement a subclass of ScoreFeature | |
* 2. Updated ScoreFeatureManager.configure() to configure your feature, and usage() to | |
* display usage info. | |
* 3. Write unit tests (see ScoreFeatureTest.cpp) and regression tests | |
**/ | |
namespace MosesTraining | |
{ | |
struct MaybeLog { | |
MaybeLog(bool useLog, float negativeLog): | |
m_useLog(useLog), m_negativeLog(negativeLog) {} | |
inline float operator() (float a) const { | |
return m_useLog ? m_negativeLog*log(a) : a; | |
} | |
float m_useLog; | |
float m_negativeLog; | |
}; | |
class ScoreFeatureArgumentException : public util::Exception | |
{ | |
public: | |
ScoreFeatureArgumentException() throw() { | |
*this << "Unable to configure features: "; | |
} | |
~ScoreFeatureArgumentException() throw() {} | |
}; | |
/** Passed to each feature to be used to calculate its values */ | |
struct ScoreFeatureContext { | |
ScoreFeatureContext( | |
const ExtractionPhrasePair &thePhrasePair, | |
const MaybeLog& theMaybeLog | |
) : | |
phrasePair(thePhrasePair), | |
maybeLog(theMaybeLog) { | |
} | |
const ExtractionPhrasePair &phrasePair; | |
MaybeLog maybeLog; | |
}; | |
/** | |
* Abstract base class for extra features that can be added to the phrase table | |
* during scoring. | |
**/ | |
class ScoreFeature | |
{ | |
public: | |
/** Some features might need to store properties in ExtractionPhrasePair, | |
* e.g. to pass along external information loaded by a feature | |
* which may distinguish several phrase occurrences based on sentence ID */ | |
virtual void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, | |
float count, | |
int sentenceId) const {}; | |
/** Add the values for this score feature. */ | |
virtual void add(const ScoreFeatureContext& context, | |
std::vector<float>& denseValues, | |
std::map<std::string,float>& sparseValues) const = 0; | |
virtual ~ScoreFeature() {} | |
}; | |
typedef boost::shared_ptr<ScoreFeature> ScoreFeaturePtr; | |
class ScoreFeatureManager | |
{ | |
public: | |
ScoreFeatureManager(): | |
m_includeSentenceId(false) {} | |
/** To be appended to the score usage message */ | |
const std::string& usage() const; | |
/** Pass the unused command-line arguments to configure the extra features */ | |
void configure(const std::vector<std::string> args); | |
/** Some features might need to store properties in ExtractionPhrasePair, | |
* e.g. to pass along external information loaded by a feature | |
* which may distinguish several phrase occurrences based on sentence ID */ | |
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, | |
float count, | |
int sentenceId) const; | |
/** Add all the features */ | |
void addFeatures(const ScoreFeatureContext& context, | |
std::vector<float>& denseValues, | |
std::map<std::string,float>& sparseValues) const; | |
const std::vector<ScoreFeaturePtr>& getFeatures() const { | |
return m_features; | |
} | |
/** Do we need to include sentence ids in phrase pairs? */ | |
bool includeSentenceId() const { | |
return m_includeSentenceId; | |
} | |
private: | |
std::vector<ScoreFeaturePtr> m_features; | |
bool m_includeSentenceId; | |
}; | |
} | |