|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <cassert> |
|
#include <boost/foreach.hpp> |
|
#include "MSPT.h" |
|
#include "../../PhraseBased/PhraseImpl.h" |
|
#include "../../Phrase.h" |
|
#include "../../System.h" |
|
#include "../../Scores.h" |
|
#include "../../InputPathsBase.h" |
|
#include "../../legacy/InputFileStream.h" |
|
#include "util/exception.hh" |
|
|
|
#include "../../PhraseBased/InputPath.h" |
|
#include "../../PhraseBased/TargetPhraseImpl.h" |
|
#include "../../PhraseBased/TargetPhrases.h" |
|
|
|
#include "../../SCFG/PhraseImpl.h" |
|
#include "../../SCFG/TargetPhraseImpl.h" |
|
#include "../../SCFG/InputPath.h" |
|
#include "../../SCFG/Stack.h" |
|
#include "../../SCFG/Stacks.h" |
|
#include "../../SCFG/Manager.h" |
|
|
|
|
|
using namespace std; |
|
|
|
namespace Moses2 |
|
{ |
|
|
|
|
|
|
|
|
|
MSPT::MSPT(size_t startInd, const std::string &line) |
|
:PhraseTable(startInd, line) |
|
,m_rootPb(NULL) |
|
,m_rootSCFG(NULL) |
|
{ |
|
ReadParameters(); |
|
} |
|
|
|
MSPT::~MSPT() |
|
{ |
|
delete m_rootPb; |
|
delete m_rootSCFG; |
|
} |
|
|
|
void MSPT::Load(System &system) |
|
{ |
|
FactorCollection &vocab = system.GetVocab(); |
|
MemPool &systemPool = system.GetSystemPool(); |
|
MemPool tmpSourcePool; |
|
|
|
if (system.isPb) { |
|
m_rootPb = new PBNODE(); |
|
} else { |
|
m_rootSCFG = new SCFGNODE(); |
|
|
|
} |
|
|
|
vector<string> toks; |
|
size_t lineNum = 0; |
|
InputFileStream strme(m_path); |
|
string line; |
|
while (getline(strme, line)) { |
|
if (++lineNum % 1000000 == 0) { |
|
cerr << lineNum << " "; |
|
} |
|
toks.clear(); |
|
TokenizeMultiCharSeparator(toks, line, "|||"); |
|
UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); |
|
|
|
|
|
|
|
if (system.isPb) { |
|
PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, |
|
toks[0]); |
|
|
|
TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system, |
|
toks[1]); |
|
|
|
target->GetScores().CreateFromString(toks[2], *this, system, true); |
|
|
|
|
|
if (toks.size() >= 4) { |
|
|
|
target->SetAlignmentInfo(toks[3]); |
|
} |
|
|
|
|
|
if (toks.size() == 7) { |
|
|
|
|
|
} |
|
|
|
system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, |
|
*target); |
|
|
|
m_rootPb->AddRule(m_input, *source, target); |
|
|
|
|
|
} else { |
|
SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, |
|
toks[0]); |
|
|
|
SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, |
|
system, toks[1]); |
|
|
|
|
|
|
|
target->GetScores().CreateFromString(toks[2], *this, system, true); |
|
|
|
|
|
|
|
|
|
|
|
target->SetAlignmentInfo(toks[3]); |
|
|
|
|
|
if (toks.size() == 7) { |
|
|
|
|
|
} |
|
|
|
system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, |
|
*target); |
|
|
|
m_rootSCFG->AddRule(m_input, *source, target); |
|
} |
|
} |
|
|
|
if (system.isPb) { |
|
m_rootPb->SortAndPrune(m_tableLimit, systemPool, system); |
|
|
|
} else { |
|
m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
TargetPhrases* MSPT::Lookup(const Manager &mgr, MemPool &pool, |
|
InputPath &inputPath) const |
|
{ |
|
const SubPhrase<Moses2::Word> &phrase = inputPath.subPhrase; |
|
TargetPhrases *tps = m_rootPb->Find(m_input, phrase); |
|
return tps; |
|
} |
|
|
|
void MSPT::InitActiveChart( |
|
MemPool &pool, |
|
const SCFG::Manager &mgr, |
|
SCFG::InputPath &path) const |
|
{ |
|
size_t ptInd = GetPtInd(); |
|
ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *m_rootSCFG); |
|
path.AddActiveChartEntry(ptInd, chartEntry); |
|
|
|
} |
|
|
|
void MSPT::Lookup(MemPool &pool, |
|
const SCFG::Manager &mgr, |
|
size_t maxChartSpan, |
|
const SCFG::Stacks &stacks, |
|
SCFG::InputPath &path) const |
|
{ |
|
if (path.range.GetNumWordsCovered() > maxChartSpan) { |
|
return; |
|
} |
|
|
|
size_t endPos = path.range.GetEndPos(); |
|
|
|
const SCFG::InputPath *prevPath = static_cast<const SCFG::InputPath*>(path.prefixPath); |
|
UTIL_THROW_IF2(prevPath == NULL, "prefixPath == NULL"); |
|
|
|
|
|
const SCFG::Word &lastWord = path.subPhrase.Back(); |
|
|
|
const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1); |
|
|
|
|
|
LookupGivenWord(pool, mgr, *prevPath, lastWord, NULL, subPhrasePath.range, path); |
|
|
|
|
|
|
|
|
|
while (prevPath) { |
|
const Range &prevRange = prevPath->range; |
|
|
|
|
|
size_t startPos = prevRange.GetEndPos() + 1; |
|
size_t ntSize = endPos - startPos + 1; |
|
const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(startPos, ntSize); |
|
|
|
LookupNT(pool, mgr, subPhrasePath.range, *prevPath, stacks, path); |
|
|
|
prevPath = static_cast<const SCFG::InputPath*>(prevPath->prefixPath); |
|
} |
|
} |
|
|
|
void MSPT::LookupGivenNode( |
|
MemPool &pool, |
|
const SCFG::Manager &mgr, |
|
const SCFG::ActiveChartEntry &prevEntry, |
|
const SCFG::Word &wordSought, |
|
const Moses2::Hypotheses *hypos, |
|
const Moses2::Range &subPhraseRange, |
|
SCFG::InputPath &outPath) const |
|
{ |
|
const ActiveChartEntryMem &prevEntryCast = static_cast<const ActiveChartEntryMem&>(prevEntry); |
|
|
|
const SCFGNODE &prevNode = prevEntryCast.node; |
|
UTIL_THROW_IF2(&prevNode == NULL, "node == NULL"); |
|
|
|
size_t ptInd = GetPtInd(); |
|
const SCFGNODE *nextNode = prevNode.Find(m_input, wordSought); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (nextNode) { |
|
|
|
ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *nextNode, prevEntry); |
|
|
|
chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this); |
|
|
|
|
|
outPath.AddActiveChartEntry(ptInd, chartEntry); |
|
|
|
const SCFG::TargetPhrases *tps = nextNode->GetTargetPhrases(); |
|
if (tps) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind()); |
|
|
|
} |
|
|
|
|
|
} |
|
} |
|
|
|
} |
|
|
|
|