|
#include "OnDiskQuery.h" |
|
|
|
namespace OnDiskPt |
|
{ |
|
|
|
void OnDiskQuery::Tokenize(Phrase &phrase, |
|
const std::string &token, |
|
bool addSourceNonTerm, |
|
bool addTargetNonTerm) |
|
{ |
|
bool nonTerm = false; |
|
size_t tokSize = token.size(); |
|
int comStr =token.compare(0, 1, "["); |
|
|
|
if (comStr == 0) { |
|
comStr = token.compare(tokSize - 1, 1, "]"); |
|
nonTerm = comStr == 0; |
|
} |
|
|
|
if (nonTerm) { |
|
|
|
size_t splitPos = token.find_first_of("[", 2); |
|
std::string wordStr = token.substr(0, splitPos); |
|
|
|
if (splitPos == std::string::npos) { |
|
|
|
WordPtr word (new Word()); |
|
word->CreateFromString(wordStr, m_wrapper.GetVocab()); |
|
phrase.AddWord(word); |
|
} else { |
|
|
|
if (addSourceNonTerm) { |
|
WordPtr word( new Word()); |
|
word->CreateFromString(wordStr, m_wrapper.GetVocab()); |
|
phrase.AddWord(word); |
|
} |
|
|
|
wordStr = token.substr(splitPos, tokSize - splitPos); |
|
if (addTargetNonTerm) { |
|
WordPtr word(new Word()); |
|
word->CreateFromString(wordStr, m_wrapper.GetVocab()); |
|
phrase.AddWord(word); |
|
} |
|
|
|
} |
|
} else { |
|
|
|
WordPtr word(new Word()); |
|
word->CreateFromString(token, m_wrapper.GetVocab()); |
|
phrase.AddWord(word); |
|
} |
|
} |
|
|
|
SourcePhrase OnDiskQuery::Tokenize(const std::vector<std::string>& tokens) |
|
{ |
|
SourcePhrase sourcePhrase; |
|
if (tokens.size() > 0) { |
|
std::vector<std::string>::const_iterator token = tokens.begin(); |
|
for (; token + 1 != tokens.end(); ++token) { |
|
Tokenize(sourcePhrase, *token, true, true); |
|
} |
|
|
|
Tokenize(sourcePhrase, *token, false, true); |
|
} |
|
return sourcePhrase; |
|
} |
|
|
|
const PhraseNode* OnDiskQuery::Query(const SourcePhrase& sourcePhrase) |
|
{ |
|
const PhraseNode *node = &m_wrapper.GetRootSourceNode(); |
|
assert(node); |
|
|
|
for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos) { |
|
const Word &word = sourcePhrase.GetWord(pos); |
|
node = node->GetChild(word, m_wrapper); |
|
if (node == NULL) { |
|
break; |
|
} |
|
} |
|
return node; |
|
} |
|
|
|
} |
|
|