sakharamg
/

NMTKD

Model card Files Files and versions Community

NMTKD / translation /tools /mosesdecoder /mert /HopeFearDecoder.cpp

sakharamg

Uploading all files

158b61b about 2 years ago

raw

history blame contribute delete

14.7 kB

	/***********************************************************************
	Moses - factored phrase-based language decoder
	Copyright (C) 2014- University of Edinburgh

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.

	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	Lesser General Public License for more details.

	You should have received a copy of the GNU Lesser General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	***********************************************************************/

	#include <algorithm>
	#include <cmath>
	#include <iterator>

	#define BOOST_FILESYSTEM_VERSION 3
	#include <boost/filesystem.hpp>
	#include <boost/lexical_cast.hpp>

	#include "util/exception.hh"
	#include "util/file_piece.hh"

	#include "Scorer.h"
	#include "HopeFearDecoder.h"

	using namespace std;
	namespace fs = boost::filesystem;

	namespace MosesTuning
	{

	static const ValType BLEU_RATIO = 5;

	std::pair<MiraWeightVector*,size_t>
	InitialiseWeights(const string& denseInitFile, const string& sparseInitFile,
	const string& type, bool verbose)
	{
	// Dense
	vector<parameter_t> initParams;
	if(!denseInitFile.empty()) {
	ifstream opt(denseInitFile.c_str());
	string buffer;
	if (opt.fail()) {
	cerr << "could not open dense initfile: " << denseInitFile << endl;
	exit(3);
	}
	if (verbose) cerr << "Reading dense features:" << endl;
	parameter_t val;
	getline(opt,buffer);
	if (buffer.find_first_of("=") == buffer.npos) {
	UTIL_THROW_IF(type == "hypergraph", util::Exception, "For hypergraph version, require dense features in 'name= value' format");
	cerr << "WARN: dense features in deprecated Moses mert format. Prefer 'name= value' format." << endl;
	istringstream strstrm(buffer);
	while(strstrm >> val) {
	initParams.push_back(val);
	if(verbose) cerr << val << endl;
	}
	} else {
	vector<string> names;
	string last_name = "";
	size_t feature_ctr = 1;
	do {
	size_t equals = buffer.find_last_of("=");
	UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
	<< buffer << "'");
	string name = buffer.substr(0,equals);
	names.push_back(name);
	initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));

	//Names for features with several values need to have their id added
	if (name != last_name) feature_ctr = 1;
	last_name = name;
	if (feature_ctr>1) {
	stringstream namestr;
	namestr << names.back() << "_" << feature_ctr;
	names[names.size()-1] = namestr.str();
	if (feature_ctr == 2) {
	stringstream namestr;
	namestr << names[names.size()-2] << "_" << (feature_ctr-1);
	names[names.size()-2] = namestr.str();
	}
	}
	++feature_ctr;

	} while(getline(opt,buffer));


	//Make sure that SparseVector encodes dense feature names as 0..n-1.
	for (size_t i = 0; i < names.size(); ++i) {
	size_t id = SparseVector::encode(names[i]);
	assert(id == i);
	if (verbose) cerr << names[i] << " " << initParams[i] << endl;
	}

	}

	opt.close();
	}
	size_t initDenseSize = initParams.size();
	// Sparse
	if(!sparseInitFile.empty()) {
	if(initDenseSize==0) {
	cerr << "sparse initialization requires dense initialization" << endl;
	exit(3);
	}
	ifstream opt(sparseInitFile.c_str());
	if(opt.fail()) {
	cerr << "could not open sparse initfile: " << sparseInitFile << endl;
	exit(3);
	}
	int sparseCount=0;
	parameter_t val;
	std::string name;
	while(opt >> name >> val) {
	size_t id = SparseVector::encode(name) + initDenseSize;
	while(initParams.size()<=id) initParams.push_back(0.0);
	initParams[id] = val;
	sparseCount++;
	}
	cerr << "Found " << sparseCount << " initial sparse features" << endl;
	opt.close();
	}

	return pair<MiraWeightVector*,size_t>(new MiraWeightVector(initParams), initDenseSize);
	}

	ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv)
	{
	vector<ValType> stats(scorer_->NumberOfScores(),0);
	for(reset(); !finished(); next()) {
	vector<ValType> sent;
	MaxModel(wv,&sent);
	for(size_t i=0; i<sent.size(); i++) {
	stats[i]+=sent[i];
	}
	}
	return scorer_->calculateScore(stats);
	}

	NbestHopeFearDecoder::NbestHopeFearDecoder(
	const vector<string>& featureFiles,
	const vector<string>& scoreFiles,
	bool streaming,
	bool no_shuffle,
	bool safe_hope,
	Scorer* scorer
	) : safe_hope_(safe_hope)
	{
	scorer_ = scorer;
	if (streaming) {
	train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
	} else {
	train_.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
	}
	}


	void NbestHopeFearDecoder::next()
	{
	train_->next();
	}

	bool NbestHopeFearDecoder::finished()
	{
	return train_->finished();
	}

	void NbestHopeFearDecoder::reset()
	{
	train_->reset();
	}

	void NbestHopeFearDecoder::HopeFear(
	const std::vector<ValType>& backgroundBleu,
	const MiraWeightVector& wv,
	HopeFearData* hopeFear
	)
	{


	// Hope / fear decode
	ValType hope_scale = 1.0;
	size_t hope_index=0, fear_index=0, model_index=0;
	ValType hope_score=0, fear_score=0, model_score=0;
	for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
	ValType hope_bleu=0, hope_model=0;
	for(size_t i=0; i< train_->cur_size(); i++) {
	const MiraFeatureVector& vec=train_->featuresAt(i);
	ValType score = wv.score(vec);
	ValType bleu = scorer_->calculateSentenceLevelBackgroundScore(train_->scoresAt(i),backgroundBleu);
	// Hope
	if(i==0 \|\| (hope_scale*score + bleu) > hope_score) {
	hope_score = hope_scale*score + bleu;
	hope_index = i;
	hope_bleu = bleu;
	hope_model = score;
	}
	// Fear
	if(i==0 \|\| (score - bleu) > fear_score) {
	fear_score = score - bleu;
	fear_index = i;
	}
	// Model
	if(i==0 \|\| score > model_score) {
	model_score = score;
	model_index = i;
	}
	}
	// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
	// where model score is having far more influence than BLEU
	hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
	if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
	hope_scale = abs(hope_bleu) / abs(hope_model);
	else break;
	}
	hopeFear->modelFeatures = train_->featuresAt(model_index);
	hopeFear->hopeFeatures = train_->featuresAt(hope_index);
	hopeFear->fearFeatures = train_->featuresAt(fear_index);

	hopeFear->hopeStats = train_->scoresAt(hope_index);
	hopeFear->hopeBleu = scorer_->calculateSentenceLevelBackgroundScore(hopeFear->hopeStats, backgroundBleu);
	const vector<float>& fear_stats = train_->scoresAt(fear_index);
	hopeFear->fearBleu = scorer_->calculateSentenceLevelBackgroundScore(fear_stats, backgroundBleu);

	hopeFear->modelStats = train_->scoresAt(model_index);
	hopeFear->hopeFearEqual = (hope_index == fear_index);
	}

	void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
	{
	// Find max model
	size_t max_index=0;
	ValType max_score=0;
	for(size_t i=0; i<train_->cur_size(); i++) {
	MiraFeatureVector vec(train_->featuresAt(i));
	ValType score = wv.score(vec);
	if(i==0 \|\| score > max_score) {
	max_index = i;
	max_score = score;
	}
	}
	*stats = train_->scoresAt(max_index);
	}



	HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
	(
	const string& hypergraphDir,
	const vector<string>& referenceFiles,
	size_t num_dense,
	bool streaming,
	bool no_shuffle,
	bool safe_hope,
	size_t hg_pruning,
	const MiraWeightVector& wv,
	Scorer* scorer
	) :
	num_dense_(num_dense)
	{

	UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
	UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
	UTIL_THROW_IF(!referenceFiles.size(), util::Exception, "No reference files supplied");
	references_.Load(referenceFiles, vocab_);

	SparseVector weights;
	wv.ToSparse(&weights,num_dense_);
	scorer_ = scorer;

	static const string kWeights = "weights";
	fs::directory_iterator dend;
	size_t fileCount = 0;

	cerr << "Reading hypergraphs" << endl;
	for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
	const fs::path& hgpath = di->path();
	if (hgpath.filename() == kWeights) continue;
	// cerr << "Reading " << hgpath.filename() << endl;
	Graph graph(vocab_);
	size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
	util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
	//util::FilePiece file(di->path().string().c_str());
	util::FilePiece file(fd.release());
	ReadGraph(file,graph);

	//cerr << "ref length " << references_.Length(id) << endl;
	size_t edgeCount = hg_pruning * references_.Length(id);
	boost::shared_ptr<Graph> prunedGraph;
	prunedGraph.reset(new Graph(vocab_));
	graph.Prune(prunedGraph.get(), weights, edgeCount);
	graphs_[id] = prunedGraph;
	// cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize() << endl;
	++fileCount;
	if (fileCount % 10 == 0) cerr << ".";
	if (fileCount % 400 == 0) cerr << " [count=" << fileCount << "]\n";
	}
	cerr << endl << "Done" << endl;

	sentenceIds_.resize(graphs_.size());
	for (size_t i = 0; i < graphs_.size(); ++i) sentenceIds_[i] = i;
	if (!no_shuffle) {
	random_shuffle(sentenceIds_.begin(), sentenceIds_.end());
	}

	}

	void HypergraphHopeFearDecoder::reset()
	{
	sentenceIdIter_ = sentenceIds_.begin();
	}

	void HypergraphHopeFearDecoder::next()
	{
	sentenceIdIter_++;
	}

	bool HypergraphHopeFearDecoder::finished()
	{
	return sentenceIdIter_ == sentenceIds_.end();
	}

	void HypergraphHopeFearDecoder::HopeFear(
	const vector<ValType>& backgroundBleu,
	const MiraWeightVector& wv,
	HopeFearData* hopeFear
	)
	{
	size_t sentenceId = *sentenceIdIter_;
	SparseVector weights;
	wv.ToSparse(&weights, num_dense_);
	const Graph& graph = *(graphs_[sentenceId]);

	// ValType hope_scale = 1.0;
	HgHypothesis hopeHypo, fearHypo, modelHypo;
	for(size_t safe_loop=0; safe_loop<2; safe_loop++) {

	//hope decode
	Viterbi(graph, weights, 1, references_, sentenceId, backgroundBleu, &hopeHypo);

	//fear decode
	Viterbi(graph, weights, -1, references_, sentenceId, backgroundBleu, &fearHypo);

	//Model decode
	Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);


	// Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
	// where model score is having far more influence than BLEU
	// hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
	// if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
	// hope_scale = abs(hope_bleu) / abs(hope_model);
	// else break;
	//TODO: Don't currently get model and bleu so commented this out for now.
	break;
	}
	//modelFeatures, hopeFeatures and fearFeatures
	hopeFear->modelFeatures = MiraFeatureVector(modelHypo.featureVector, num_dense_);
	hopeFear->hopeFeatures = MiraFeatureVector(hopeHypo.featureVector, num_dense_);
	hopeFear->fearFeatures = MiraFeatureVector(fearHypo.featureVector, num_dense_);

	//Need to know which are to be mapped to dense features!

	//Only C++11
	//hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats));
	vector<ValType> fearStats(scorer_->NumberOfScores());
	hopeFear->hopeStats.reserve(scorer_->NumberOfScores());
	hopeFear->modelStats.reserve(scorer_->NumberOfScores());
	for (size_t i = 0; i < fearStats.size(); ++i) {
	hopeFear->modelStats.push_back(modelHypo.bleuStats[i]);
	hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]);

	fearStats[i] = fearHypo.bleuStats[i];
	}
	/*
	cerr << "hope" << endl;;
	for (size_t i = 0; i < hopeHypo.text.size(); ++i) {
	cerr << hopeHypo.text[i]->first << " ";
	}
	cerr << endl;
	for (size_t i = 0; i < fearStats.size(); ++i) {
	cerr << hopeHypo.bleuStats[i] << " ";
	}
	cerr << endl;
	cerr << "fear";
	for (size_t i = 0; i < fearHypo.text.size(); ++i) {
	cerr << fearHypo.text[i]->first << " ";
	}
	cerr << endl;
	for (size_t i = 0; i < fearStats.size(); ++i) {
	cerr << fearHypo.bleuStats[i] << " ";
	}
	cerr << endl;
	cerr << "model";
	for (size_t i = 0; i < modelHypo.text.size(); ++i) {
	cerr << modelHypo.text[i]->first << " ";
	}
	cerr << endl;
	for (size_t i = 0; i < fearStats.size(); ++i) {
	cerr << modelHypo.bleuStats[i] << " ";
	}
	cerr << endl;
	*/
	hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
	hopeFear->fearBleu = sentenceLevelBackgroundBleu(fearStats, backgroundBleu);

	//If fv and bleu stats are equal, then assume equal
	hopeFear->hopeFearEqual = true; //(hopeFear->hopeBleu - hopeFear->fearBleu) >= 1e-8;
	if (hopeFear->hopeFearEqual) {
	for (size_t i = 0; i < fearStats.size(); ++i) {
	if (fearStats[i] != hopeFear->hopeStats[i]) {
	hopeFear->hopeFearEqual = false;
	break;
	}
	}
	}
	hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
	}

	void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats)
	{
	assert(!finished());
	HgHypothesis bestHypo;
	size_t sentenceId = *sentenceIdIter_;
	SparseVector weights;
	wv.ToSparse(&weights, num_dense_);
	vector<ValType> bg(scorer_->NumberOfScores());
	//cerr << "Calculating bleu on " << sentenceId << endl;
	Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
	stats->resize(bestHypo.bleuStats.size());
	/*
	for (size_t i = 0; i < bestHypo.text.size(); ++i) {
	cerr << bestHypo.text[i]->first << " ";
	}
	cerr << endl;
	*/
	for (size_t i = 0; i < bestHypo.bleuStats.size(); ++i) {
	(*stats)[i] = bestHypo.bleuStats[i];
	}
	}



	};