File size: 1,851 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#pragma once

#include <string>
#include <cstdlib>

#include "ThreadLocalByFeatureStorage.h"
#include "VWFeatureSource.h"
#include "TabbedSentence.h"

namespace Moses
{

// Assuming a given column of TabbedSentence contains space separated source features
class VWFeatureSourceExternalFeatures : public VWFeatureSource
{
public:
  VWFeatureSourceExternalFeatures(const std::string &line)
    : VWFeatureSource(line), m_tls(this), m_column(0) {
    ReadParameters();

    // Call this last
    VWFeatureBase::UpdateRegister();
  }

  void operator()(const InputType &input
                  , const Range &sourceRange
                  , Discriminative::Classifier &classifier
                  , Discriminative::FeatureVector &outFeatures) const {
    const Features& features = *m_tls.GetStored();
    for (size_t i = 0; i < features.size(); i++) {
      outFeatures.push_back(classifier.AddLabelIndependentFeature("srcext^" + features[i]));
    }
  }

  virtual void SetParameter(const std::string& key, const std::string& value) {
    if(key == "column")
      m_column = Scan<size_t>(value);
    else
      VWFeatureSource::SetParameter(key, value);
  }

  virtual void InitializeForInput(ttasksptr const& ttask) {
    InputType const& source = *(ttask->GetSource().get());
    UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
                   "This feature function requires the TabbedSentence input type");

    const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
    const std::string &column = tabbedSentence.GetColumn(m_column);

    Features& features = *m_tls.GetStored();
    features.clear();

    Tokenize(features, column, " ");
  }

private:
  typedef std::vector<std::string> Features;
  typedef ThreadLocalByFeatureStorage<Features> TLSFeatures;

  TLSFeatures m_tls;
  size_t m_column;
};

}