File size: 2,903 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#include "M2Scorer.h"
#include <algorithm>
#include <fstream>
#include <stdexcept>
#include <sstream>
#include <cstdlib>
#include <boost/lexical_cast.hpp>
using namespace std;
namespace MosesTuning
{
M2Scorer::M2Scorer(const string& config)
: StatisticsBasedScorer("M2Scorer", config),
beta_(Scan<float>(getConfig("beta", "0.5"))),
max_unchanged_words_(Scan<int>(getConfig("max_unchanged_words", "2"))),
truecase_(Scan<bool>(getConfig("truecase", "false"))),
verbose_(Scan<bool>(getConfig("verbose", "false"))),
m2_(max_unchanged_words_, beta_, truecase_)
{}
void M2Scorer::setReferenceFiles(const vector<string>& referenceFiles)
{
for(size_t i = 0; i < referenceFiles.size(); ++i) {
m2_.ReadM2(referenceFiles[i]);
break;
}
}
void M2Scorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
string sentence = trimStr(this->preprocessSentence(text));
std::vector<ScoreStatsType> stats(4, 0);
m2_.SufStats(sentence, sid, stats);
entry.set(stats);
}
float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
if (comps.size() != NumberOfScores()) {
throw runtime_error("Size of stat vector for M2Scorer is not " + NumberOfScores());
}
float beta = beta_;
float p = 0.0;
float r = 0.0;
float f = 0.0;
if(comps[1] != 0)
p = comps[0] / (double)comps[1];
else
p = 1.0;
if(comps[2] != 0)
r = comps[0] / (double)comps[2];
else
r = 1.0;
float denom = beta * beta * p + r;
if(denom != 0)
f = (1.0 + beta * beta) * p * r / denom;
else
f = 0.0;
if(verbose_)
std::cerr << comps[0] << " " << comps[1] << " " << comps[2] << std::endl;
if(verbose_)
std::cerr << p << " " << r << " " << f << std::endl;
return f;
}
float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
{
return comps[3];
}
std::vector<ScoreStatsType> randomStats(float decay, int max)
{
int gold = rand() % max;
int prop = rand() % max;
int corr = 0.0;
if(std::min(prop, gold) > 0)
corr = rand() % std::min(prop, gold);
//std::cerr << corr << " " << prop << " " << gold << std::endl;
std::vector<ScoreStatsType> stats(3, 0.0);
stats[0] = corr * decay;
stats[1] = prop * decay;
stats[2] = gold * decay;
return stats;
}
float sentenceM2(const std::vector<ScoreStatsType>& stats)
{
float beta = 0.5;
std::vector<ScoreStatsType> smoothStats(3, 0.0); // = randomStats(0.001, 5);
smoothStats[0] += stats[0];
smoothStats[1] += stats[1];
smoothStats[2] += stats[2];
float p = 0.0;
float r = 0.0;
float f = 0.0;
if(smoothStats[1] != 0)
p = smoothStats[0] / smoothStats[1];
else
p = 1.0;
if(smoothStats[2] != 0)
r = smoothStats[0] / smoothStats[2];
else
r = 1.0;
float denom = beta * beta * p + r;
if(denom != 0)
f = (1.0 + beta * beta) * p * r / denom;
else
f = 0.0;
return f;
}
}
|