|
#include "RuleTrieLoader.h" |
|
|
|
#include <sys/stat.h> |
|
#include <cstdlib> |
|
|
|
#include <fstream> |
|
#include <string> |
|
#include <iterator> |
|
#include <algorithm> |
|
#include <iostream> |
|
#include <cmath> |
|
|
|
#include "moses/FactorCollection.h" |
|
#include "moses/Word.h" |
|
#include "moses/Util.h" |
|
#include "moses/Timer.h" |
|
#include "moses/InputFileStream.h" |
|
#include "moses/StaticData.h" |
|
#include "moses/Range.h" |
|
#include "moses/ChartTranslationOptionList.h" |
|
#include "moses/FactorCollection.h" |
|
#include "moses/Syntax/RuleTableFF.h" |
|
#include "util/file_piece.hh" |
|
#include "util/string_piece.hh" |
|
#include "util/tokenize_piece.hh" |
|
#include "util/double-conversion/double-conversion.h" |
|
#include "util/exception.hh" |
|
|
|
#include "RuleTrie.h" |
|
#include "moses/parameters/AllOptions.h" |
|
|
|
namespace Moses |
|
{ |
|
namespace Syntax |
|
{ |
|
namespace S2T |
|
{ |
|
|
|
bool RuleTrieLoader::Load(Moses::AllOptions const& opts, |
|
const std::vector<FactorType> &input, |
|
const std::vector<FactorType> &output, |
|
const std::string &inFile, |
|
const RuleTableFF &ff, |
|
RuleTrie &trie) |
|
{ |
|
PrintUserTime(std::string("Start loading text phrase table. Moses format")); |
|
|
|
|
|
|
|
std::size_t count = 0; |
|
|
|
std::ostream *progress = NULL; |
|
IFVERBOSE(1) progress = &std::cerr; |
|
util::FilePiece in(inFile.c_str(), progress); |
|
|
|
|
|
std::vector<float> scoreVector; |
|
StringPiece line; |
|
|
|
double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); |
|
|
|
while(true) { |
|
try { |
|
line = in.ReadLine(); |
|
} catch (const util::EndOfFileException &e) { |
|
break; |
|
} |
|
|
|
util::TokenIter<util::MultiCharacter> pipes(line, "|||"); |
|
StringPiece sourcePhraseString(*pipes); |
|
StringPiece targetPhraseString(*++pipes); |
|
StringPiece scoreString(*++pipes); |
|
|
|
StringPiece alignString; |
|
if (++pipes) { |
|
StringPiece temp(*pipes); |
|
alignString = temp; |
|
} |
|
|
|
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos); |
|
if (isLHSEmpty && !opts.unk.word_deletion_enabled) { |
|
TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); |
|
continue; |
|
} |
|
|
|
scoreVector.clear(); |
|
for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) { |
|
int processed; |
|
float score = converter.StringToFloat(s->data(), s->length(), &processed); |
|
UTIL_THROW_IF2(std::isnan(score), "Bad score " << *s << " on line " << count); |
|
scoreVector.push_back(FloorScore(TransformScore(score))); |
|
} |
|
const size_t numScoreComponents = ff.GetNumScoreComponents(); |
|
if (scoreVector.size() != numScoreComponents) { |
|
UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!=" |
|
<< numScoreComponents << ") of score components on line " << count); |
|
} |
|
|
|
|
|
|
|
|
|
Word *sourceLHS = NULL; |
|
Word *targetLHS; |
|
|
|
|
|
TargetPhrase *targetPhrase = new TargetPhrase(&ff); |
|
targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS); |
|
|
|
Phrase sourcePhrase; |
|
sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS); |
|
|
|
|
|
targetPhrase->SetAlignmentInfo(alignString); |
|
targetPhrase->SetTargetLHS(targetLHS); |
|
|
|
++pipes; |
|
|
|
if (++pipes) { |
|
StringPiece sparseString(*pipes); |
|
targetPhrase->SetSparseScore(&ff, sparseString); |
|
} |
|
|
|
if (++pipes) { |
|
StringPiece propertiesString(*pipes); |
|
targetPhrase->SetProperties(propertiesString); |
|
} |
|
|
|
targetPhrase->GetScoreBreakdown().Assign(&ff, scoreVector); |
|
targetPhrase->EvaluateInIsolation(sourcePhrase, ff.GetFeaturesToApply()); |
|
|
|
TargetPhraseCollection::shared_ptr phraseColl |
|
= GetOrCreateTargetPhraseCollection(trie, sourcePhrase, |
|
*targetPhrase, sourceLHS); |
|
phraseColl->Add(targetPhrase); |
|
|
|
|
|
delete sourceLHS; |
|
|
|
count++; |
|
} |
|
|
|
|
|
if (ff.GetTableLimit()) { |
|
SortAndPrune(trie, ff.GetTableLimit()); |
|
} |
|
|
|
return true; |
|
} |
|
|
|
} |
|
} |
|
} |
|
|