File size: 5,513 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
#ifndef moses_FF_LexicalReordering_SparseReordering_h
#define moses_FF_LexicalReordering_SparseReordering_h
/**
* Sparse reordering features for phrase-based MT, following Cherry (NAACL, 2013)
**/
#include <functional>
#include <map>
#include <string>
#include <vector>
#include <boost/unordered_set.hpp>
#include "util/murmur_hash.hh"
#include "util/pool.hh"
#include "util/string_piece.hh"
#include "moses/FeatureVector.h"
#include "moses/ScoreComponentCollection.h"
#include "LRState.h"
/**
Configuration of sparse reordering:
The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
sparse-words-(source|target)-<id>=<filename> -- Features which fire for the words in the list
sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
of cluster file TBD
sparse-phrase -- Add features which depend on the current phrase (backward)
sparse-stack -- Add features which depend on the previous phrase, or
top of stack. (forward)
sparse-between -- Add features which depend on words between previous phrase
(or top of stack) and current phrase.
**/
namespace Moses
{
/**
* Used to store pre-calculated feature names.
**/
struct SparseReorderingFeatureKey {
size_t id;
enum Type {Stack, Phrase, Between} type;
const Factor* word;
bool isCluster;
enum Position {First, Last} position;
enum Side {Source, Target} side;
LRState::ReorderingType reoType;
SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
Position position_, Side side_, LRState::ReorderingType reoType_)
: id(id_), type(type_), word(word_), isCluster(isCluster_),
position(position_), side(side_), reoType(reoType_) {
}
const std::string& Name(const std::string& wordListId) ;
};
struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
std::size_t operator()(const SparseReorderingFeatureKey& key) const {
//TODO: can we just hash the memory?
//not sure, there could be random padding
std::size_t seed = 0;
seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
seed = util::MurmurHashNative(&key.type, sizeof(key.type), seed);
seed = util::MurmurHashNative(&key.word, sizeof(key.word), seed);
seed = util::MurmurHashNative(&key.isCluster, sizeof(key.isCluster), seed);
seed = util::MurmurHashNative(&key.position, sizeof(key.position), seed);
seed = util::MurmurHashNative(&key.side, sizeof(key.side), seed);
seed = util::MurmurHashNative(&key.reoType, sizeof(key.reoType), seed);
return seed;
}
};
struct EqualsSparseReorderingFeatureKey :
public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
//TODO: Can we just compare the memory?
return left.id == right.id && left.type == right.type && left.word == right.word &&
left.position == right.position && left.side == right.side &&
left.reoType == right.reoType;
}
};
class SparseReordering
{
public:
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
//If direction is backward the options will be different, for forward they will be the same
void CopyScores(const TranslationOption& currentOpt,
const TranslationOption* previousOpt,
const InputType& input,
LRModel::ReorderingType reoType,
LRModel::Direction direction,
ScoreComponentCollection* scores) const ;
private:
const LexicalReordering* m_producer;
typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
std::vector<WordList> m_sourceWordLists;
std::vector<WordList> m_targetWordLists;
typedef std::pair<std::string, boost::unordered_map<const Factor*, const Factor*> > ClusterMap; //id and map
std::vector<ClusterMap> m_sourceClusterMaps;
std::vector<ClusterMap> m_targetClusterMaps;
bool m_usePhrase;
bool m_useBetween;
bool m_useStack;
typedef boost::unordered_map<SparseReorderingFeatureKey, FName, HashSparseReorderingFeatureKey, EqualsSparseReorderingFeatureKey> FeatureMap;
FeatureMap m_featureMap;
typedef boost::unordered_map<std::string, float> WeightMap;
WeightMap m_weightMap;
bool m_useWeightMap;
std::vector<FName> m_featureMap2;
void ReadWordList(const std::string& filename, const std::string& id,
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
void ReadWeightMap(const std::string& filename);
void AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
const Word& word, SparseReorderingFeatureKey::Position position,
LRModel::ReorderingType reoType,
ScoreComponentCollection* scores) const;
};
} //namespace
#endif
|