|
#ifndef BLUESCOREFEATURE_H |
|
#define BLUESCOREFEATURE_H |
|
|
|
#include <utility> |
|
#include <string> |
|
#include <vector> |
|
|
|
#include <boost/unordered_map.hpp> |
|
|
|
#include "StatefulFeatureFunction.h" |
|
|
|
#include "moses/FF/FFState.h" |
|
#include "moses/Phrase.h" |
|
#include "moses/ChartHypothesis.h" |
|
|
|
namespace Moses |
|
{ |
|
|
|
class BleuScoreFeature; |
|
|
|
class BleuScoreState : public FFState |
|
{ |
|
public: |
|
friend class BleuScoreFeature; |
|
static size_t bleu_order; |
|
|
|
BleuScoreState(bool is_syntax); |
|
size_t hash() const; |
|
virtual bool operator==(const FFState& other) const; |
|
|
|
void print(std::ostream& out) const; |
|
|
|
private: |
|
Phrase m_words; |
|
size_t m_source_length; |
|
size_t m_target_length; |
|
bool m_is_syntax; |
|
|
|
float m_scaled_ref_length; |
|
|
|
std::vector< size_t > m_ngram_counts; |
|
std::vector< size_t > m_ngram_matches; |
|
|
|
void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches); |
|
}; |
|
|
|
|
|
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state); |
|
|
|
typedef boost::unordered_map< Phrase, size_t > NGrams; |
|
|
|
class RefValue : public std::pair<std::vector<size_t>,NGrams> |
|
{ |
|
public: |
|
RefValue& operator=( const RefValue& rhs ) { |
|
first = rhs.first; |
|
second = rhs.second; |
|
return *this; |
|
} |
|
}; |
|
|
|
|
|
class BleuScoreFeature : public StatefulFeatureFunction |
|
{ |
|
public: |
|
static const std::vector<BleuScoreFeature*>& GetColl() { |
|
return s_staticColl; |
|
} |
|
|
|
typedef boost::unordered_map<size_t, RefValue > RefCounts; |
|
typedef boost::unordered_map<size_t, NGrams> Matches; |
|
|
|
BleuScoreFeature(const std::string &line); |
|
|
|
void SetParameter(const std::string& key, const std::string& value); |
|
|
|
std::vector<float> DefaultWeights() const; |
|
|
|
void PrintHistory(std::ostream& out) const; |
|
void LoadReferences(const std::vector< std::vector< std::string > > &); |
|
void SetCurrSourceLength(size_t); |
|
void SetCurrNormSourceLength(size_t); |
|
void SetCurrShortestRefLength(size_t); |
|
void SetCurrAvgRefLength(size_t sent_id); |
|
void SetAvgInputLength (float l) { |
|
m_avg_input_length = l; |
|
} |
|
void SetCurrReferenceNgrams(size_t sent_id); |
|
size_t GetShortestRefIndex(size_t ref_id); |
|
size_t GetClosestRefLength(size_t ref_id, int hypoLength); |
|
void UpdateHistory(const std::vector< const Word* >&); |
|
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch); |
|
void PrintRefLength(const std::vector<size_t>& ref_ids); |
|
void SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength, |
|
bool scaleByInverseLength, bool scaleByAvgInverseLength, |
|
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu); |
|
|
|
void GetNgramMatchCounts(Phrase&, |
|
const NGrams&, |
|
std::vector< size_t >&, |
|
std::vector< size_t >&, |
|
size_t skip = 0) const; |
|
void GetNgramMatchCounts_prefix(Phrase&, |
|
const NGrams&, |
|
std::vector< size_t >&, |
|
std::vector< size_t >&, |
|
size_t new_start_indices, |
|
size_t last_end_index) const; |
|
void GetNgramMatchCounts_overlap(Phrase& phrase, |
|
const NGrams& ref_ngram_counts, |
|
std::vector< size_t >& ret_counts, |
|
std::vector< size_t >& ret_matches, |
|
size_t overlap_index) const; |
|
void GetClippedNgramMatchesAndCounts(Phrase&, |
|
const NGrams&, |
|
std::vector< size_t >&, |
|
std::vector< size_t >&, |
|
size_t skip = 0) const; |
|
|
|
FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, |
|
const FFState* prev_state, |
|
ScoreComponentCollection* accumulator) const; |
|
FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, |
|
int featureID, |
|
ScoreComponentCollection* accumulator) const; |
|
|
|
bool Enabled() const { |
|
return m_enabled; |
|
} |
|
|
|
bool IsUseable(const FactorMask &mask) const; |
|
|
|
float CalculateBleu(BleuScoreState*) const; |
|
float CalculateBleu(Phrase translation) const; |
|
const FFState* EmptyHypothesisState(const InputType&) const; |
|
|
|
float GetSourceLengthHistory() { |
|
return m_source_length_history; |
|
} |
|
float GetTargetLengthHistory() { |
|
return m_target_length_history; |
|
} |
|
float GetAverageInputLength() { |
|
return m_avg_input_length; |
|
} |
|
|
|
void Load(AllOptions::ptr const& opts); |
|
|
|
private: |
|
static std::vector<BleuScoreFeature*> s_staticColl; |
|
|
|
bool m_enabled; |
|
bool m_sentence_bleu; |
|
bool m_simple_history_bleu; |
|
bool m_is_syntax; |
|
|
|
std::vector< float > m_count_history; |
|
std::vector< float > m_match_history; |
|
float m_source_length_history; |
|
float m_target_length_history; |
|
float m_ref_length_history; |
|
|
|
size_t m_cur_source_length; |
|
size_t m_cur_norm_source_length; |
|
RefCounts m_refs; |
|
NGrams m_cur_ref_ngrams; |
|
float m_cur_ref_length; |
|
|
|
|
|
bool m_scale_by_input_length; |
|
bool m_scale_by_avg_input_length; |
|
|
|
|
|
bool m_scale_by_inverse_length; |
|
bool m_scale_by_avg_inverse_length; |
|
|
|
float m_avg_input_length; |
|
|
|
float m_scale_by_x; |
|
|
|
|
|
float m_historySmoothing; |
|
|
|
enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 }; |
|
SmoothingScheme m_smoothing_scheme; |
|
}; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|