File size: 4,478 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
#include <boost/functional/hash.hpp>
#include <vector>
#include <algorithm>
#include <iterator>
#include <boost/foreach.hpp>
#include "CoveredReferenceFeature.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
#include "moses/Manager.h"
#include "moses/ChartHypothesis.h"
#include "moses/ChartManager.h"
#include "moses/StaticData.h"
#include "moses/InputFileStream.h"
#include "moses/Util.h"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
size_t CoveredReferenceState::hash() const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
bool CoveredReferenceState::operator==(const FFState& other) const
{
UTIL_THROW2("TODO:Haven't figure this out yet");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedScores) const
{
long id = input.GetTranslationId();
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
multiset<string> covered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
refIt->second.begin(), refIt->second.end(),
inserter(covered, covered.begin()));
vector<float> scores;
scores.push_back(covered.size());
scoreBreakdown.Assign(this, scores);
estimatedScores->Assign(this, scores);
}
void CoveredReferenceFeature::Load(AllOptions::ptr const& opts)
{
m_options = opts;
InputFileStream refFile(m_path);
std::string line;
const StaticData &staticData = StaticData::Instance();
long sentenceID = opts->output.start_translation_id;
while (getline(refFile, line)) {
vector<string> words = Tokenize(line, " ");
multiset<string> wordSet;
// TODO make Tokenize work with other containers than vector
copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
m_refs.insert(make_pair(sentenceID++, wordSet));
}
}
void CoveredReferenceFeature::SetParameter(const std::string& key, const std::string& value)
{
if (key == "path") {
m_path = value;
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
FFState* CoveredReferenceFeature::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
const CoveredReferenceState &prev = static_cast<const CoveredReferenceState&>(*prev_state);
CoveredReferenceState *ret = new CoveredReferenceState(prev);
const Manager &mgr = cur_hypo.GetManager();
const InputType &input = mgr.GetSource();
long id = input.GetTranslationId();
// which words from the reference remain uncovered
multiset<string> remaining;
boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
set_difference(refIt->second.begin(), refIt->second.end(),
ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
inserter(remaining, remaining.begin()));
// which of the remaining words are present in the current phrase
multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
multiset<string> newCovered;
set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
remaining.begin(), remaining.end(),
inserter(newCovered, newCovered.begin()));
vector<float> estimateScore =
cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);
vector<float> scores;
scores.push_back(newCovered.size() - estimateScore[0]);
accumulator->PlusEquals(this, scores);
// update feature state
multiset<string>::const_iterator newCoveredIt;
for (newCoveredIt = newCovered.begin(); newCoveredIt != newCovered.end(); newCoveredIt++) {
ret->m_coveredRef.insert(*newCoveredIt);
}
return ret;
}
FFState* CoveredReferenceFeature::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
UTIL_THROW(util::Exception, "Not implemented");
}
}
|