File size: 6,437 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#pragma once

#include <string>
#include <boost/thread/tss.hpp>

#include "vw/Classifier.h"
#include "moses/TypeDef.h"
#include "moses/TranslationTask.h"
#include "moses/Util.h"
#include "moses/FF/StatelessFeatureFunction.h"

namespace Moses
{

enum VWFeatureType {
  vwft_source,
  vwft_target,
  vwft_targetContext
};

class VWFeatureBase : public StatelessFeatureFunction
{
public:
  VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
    : StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
    // defaults
    m_sourceFactors.push_back(0);
    m_targetFactors.push_back(0);
  }

  bool IsUseable(const FactorMask &mask) const {
    return true;
  }

  // Official hooks should do nothing. This is a hack to be able to define
  // classifier features in the moses.ini configuration file.
  void EvaluateInIsolation(const Phrase &source
                           , const TargetPhrase &targetPhrase
                           , ScoreComponentCollection &scoreBreakdown
                           , ScoreComponentCollection &estimatedFutureScore) const {}
  void EvaluateWithSourceContext(const InputType &input
                                 , const InputPath &inputPath
                                 , const TargetPhrase &targetPhrase
                                 , const StackVec *stackVec
                                 , ScoreComponentCollection &scoreBreakdown
                                 , ScoreComponentCollection *estimatedFutureScore = NULL) const {}
  void EvaluateTranslationOptionListWithSourceContext(const InputType &input
      , const TranslationOptionList &translationOptionList) const {}
  void EvaluateWhenApplied(const Hypothesis& hypo,
                           ScoreComponentCollection* accumulator) const {}
  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                           ScoreComponentCollection* accumulator) const {}


  // Common parameters for classifier features, both source and target features
  virtual void SetParameter(const std::string& key, const std::string& value) {
    if (key == "used-by") {
      ParseUsedBy(value);
    } else if (key == "source-factors") {
      Tokenize<FactorType>(m_sourceFactors, value, ",");
    } else if (key == "target-factors") {
      Tokenize<FactorType>(m_targetFactors, value, ",");
    } else {
      StatelessFeatureFunction::SetParameter(key, value);
    }
  }

  // Return all classifier features, regardless of type
  static const std::vector<VWFeatureBase*>& GetFeatures(std::string name = "VW0") {
    UTIL_THROW_IF2(s_features.count(name) == 0, "No features registered for parent classifier: " + name);
    return s_features[name];
  }

  // Return only source-dependent classifier features
  static const std::vector<VWFeatureBase*>& GetSourceFeatures(std::string name = "VW0") {
    UTIL_THROW_IF2(s_sourceFeatures.count(name) == 0, "No source features registered for parent classifier: " + name);
    return s_sourceFeatures[name];
  }

  // Return only target-context classifier features
  static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
    // don't throw an exception when there are no target-context features, this feature type is not mandatory
    return s_targetContextFeatures[name];
  }

  // Return only target-dependent classifier features
  static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
    UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
    return s_targetFeatures[name];
  }

  // Required length context (maximum context size of defined target-context features)
  static size_t GetMaximumContextSize(std::string name = "VW0") {
    return s_targetContextLength[name]; // 0 by default
  }

  // Overload to process source-dependent data, create features once for every
  // source sentence word range.
  virtual void operator()(const InputType &input
                          , const Range &sourceRange
                          , Discriminative::Classifier &classifier
                          , Discriminative::FeatureVector &outFeatures) const = 0;

  // Overload to process target-dependent features, create features once for
  // every target phrase. One source word range will have at least one target
  // phrase, but may have more.
  virtual void operator()(const InputType &input
                          , const TargetPhrase &targetPhrase
                          , Discriminative::Classifier &classifier
                          , Discriminative::FeatureVector &outFeatures) const = 0;

  // Overload to process target-context dependent features, these features are
  // evaluated during decoding. For efficiency, features are not fed directly into
  // the classifier object but instead output in the vector "features" and managed
  // separately in VW.h.
  virtual void operator()(const InputType &input
                          , const Phrase &contextPhrase
                          , const AlignmentInfo &alignmentInfo
                          , Discriminative::Classifier &classifier
                          , Discriminative::FeatureVector &outFeatures) const = 0;

protected:
  std::vector<FactorType> m_sourceFactors, m_targetFactors;

  void UpdateRegister() {
    for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
        it != m_usedBy.end(); it++) {
      s_features[*it].push_back(this);

      if(m_featureType == vwft_source) {
        s_sourceFeatures[*it].push_back(this);
      } else if (m_featureType == vwft_targetContext) {
        s_targetContextFeatures[*it].push_back(this);
        UpdateContextSize(*it);
      } else {
        s_targetFeatures[*it].push_back(this);
      }
    }
  }

private:
  void ParseUsedBy(const std::string &usedBy) {
    m_usedBy.clear();
    Tokenize(m_usedBy, usedBy, ",");
  }

  void UpdateContextSize(const std::string &usedBy);

  std::vector<std::string> m_usedBy;
  VWFeatureType m_featureType;
  static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
  static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
  static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
  static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;

  static std::map<std::string, size_t> s_targetContextLength;
};

}