|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <boost/algorithm/string/predicate.hpp> |
|
#include "ScoreFeature.h" |
|
#include "DomainFeature.h" |
|
#include "InternalStructFeature.h" |
|
|
|
using namespace std; |
|
using namespace boost::algorithm; |
|
|
|
namespace MosesTraining |
|
{ |
|
|
|
|
|
const string& ScoreFeatureManager::usage() const |
|
{ |
|
const static string& usage = "[--[Sparse]Domain[Indicator|Ratio|Subset|Bin] domain-file [bins]]" ; |
|
return usage; |
|
} |
|
|
|
void ScoreFeatureManager::configure(const std::vector<std::string> args) |
|
{ |
|
bool domainAdded = false; |
|
bool sparseDomainAdded = false; |
|
|
|
for (size_t i = 0; i < args.size(); ++i) { |
|
if (args[i] == "--IgnoreSentenceId") { |
|
m_includeSentenceId = true; |
|
} else if (starts_with(args[i], "--Domain")) { |
|
string type = args[i].substr(8); |
|
++i; |
|
UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file"); |
|
string domainFile = args[i]; |
|
UTIL_THROW_IF(domainAdded, ScoreFeatureArgumentException, |
|
"Only allowed one domain feature"); |
|
if (type == "Subset") { |
|
m_features.push_back(ScoreFeaturePtr(new SubsetDomainFeature(domainFile))); |
|
} else if (type == "Ratio") { |
|
m_features.push_back(ScoreFeaturePtr(new RatioDomainFeature(domainFile))); |
|
} else if (type == "Indicator") { |
|
m_features.push_back(ScoreFeaturePtr(new IndicatorDomainFeature(domainFile))); |
|
} else { |
|
UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type); |
|
} |
|
domainAdded = true; |
|
m_includeSentenceId = true; |
|
} else if (starts_with(args[i], "--SparseDomain")) { |
|
string type = args[i].substr(14); |
|
++i; |
|
UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file"); |
|
string domainFile = args[i]; |
|
UTIL_THROW_IF(sparseDomainAdded, ScoreFeatureArgumentException, |
|
"Only allowed one sparse domain feature"); |
|
if (type == "Subset") { |
|
m_features.push_back(ScoreFeaturePtr(new SparseSubsetDomainFeature(domainFile))); |
|
} else if (type == "Ratio") { |
|
m_features.push_back(ScoreFeaturePtr(new SparseRatioDomainFeature(domainFile))); |
|
} else if (type == "Indicator") { |
|
m_features.push_back(ScoreFeaturePtr(new SparseIndicatorDomainFeature(domainFile))); |
|
} else { |
|
UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type); |
|
} |
|
sparseDomainAdded = true; |
|
m_includeSentenceId = true; |
|
} else if(args[i] == "--TreeFeatureSparse") { |
|
|
|
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse())); |
|
} else if(args[i] == "--TreeFeatureDense") { |
|
|
|
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense())); |
|
} else { |
|
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]); |
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair, |
|
float count, |
|
int sentenceId) const |
|
{ |
|
for (size_t i = 0; i < m_features.size(); ++i) { |
|
m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId); |
|
} |
|
} |
|
|
|
void ScoreFeatureManager::addFeatures(const ScoreFeatureContext& context, |
|
std::vector<float>& denseValues, |
|
std::map<std::string,float>& sparseValues) const |
|
{ |
|
for (size_t i = 0; i < m_features.size(); ++i) { |
|
m_features[i]->add(context, denseValues, sparseValues); |
|
} |
|
} |
|
} |
|
|
|
|