|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <boost/algorithm/string/predicate.hpp> |
|
#include "moses/Util.h" |
|
#include "Word.h" |
|
|
|
#include "util/tokenize_piece.hh" |
|
#include "util/exception.hh" |
|
|
|
using namespace std; |
|
using namespace boost::algorithm; |
|
|
|
namespace OnDiskPt |
|
{ |
|
|
|
Word::Word(const Word ©) |
|
:m_isNonTerminal(copy.m_isNonTerminal) |
|
,m_vocabId(copy.m_vocabId) |
|
{} |
|
|
|
Word::~Word() |
|
{} |
|
|
|
void Word::CreateFromString(const std::string &inString, Vocab &vocab) |
|
{ |
|
if (starts_with(inString, "[") && ends_with(inString, "]")) { |
|
|
|
m_isNonTerminal = true; |
|
string str = inString.substr(1, inString.size() - 2); |
|
m_vocabId = vocab.AddVocabId(str); |
|
} else { |
|
m_isNonTerminal = false; |
|
m_vocabId = vocab.AddVocabId(inString); |
|
} |
|
|
|
} |
|
|
|
size_t Word::WriteToMemory(char *mem) const |
|
{ |
|
uint64_t *vocabMem = (uint64_t*) mem; |
|
vocabMem[0] = m_vocabId; |
|
|
|
size_t size = sizeof(uint64_t); |
|
|
|
|
|
char bNonTerm = (char) m_isNonTerminal; |
|
mem[size] = bNonTerm; |
|
++size; |
|
|
|
return size; |
|
} |
|
|
|
size_t Word::ReadFromMemory(const char *mem) |
|
{ |
|
uint64_t *vocabMem = (uint64_t*) mem; |
|
m_vocabId = vocabMem[0]; |
|
|
|
size_t memUsed = sizeof(uint64_t); |
|
|
|
|
|
char bNonTerm; |
|
bNonTerm = mem[memUsed]; |
|
m_isNonTerminal = (bool) bNonTerm; |
|
++memUsed; |
|
|
|
return memUsed; |
|
} |
|
|
|
size_t Word::ReadFromFile(std::fstream &file) |
|
{ |
|
const size_t memAlloc = sizeof(uint64_t) + sizeof(char); |
|
char mem[sizeof(uint64_t) + sizeof(char)]; |
|
file.read(mem, memAlloc); |
|
|
|
size_t memUsed = ReadFromMemory(mem); |
|
assert(memAlloc == memUsed); |
|
|
|
return memAlloc; |
|
} |
|
|
|
int Word::Compare(const Word &compare) const |
|
{ |
|
int ret; |
|
|
|
if (m_isNonTerminal != compare.m_isNonTerminal) |
|
return m_isNonTerminal ?-1 : 1; |
|
|
|
if (m_vocabId < compare.m_vocabId) |
|
ret = -1; |
|
else if (m_vocabId > compare.m_vocabId) |
|
ret = 1; |
|
else |
|
ret = 0; |
|
|
|
return ret; |
|
} |
|
|
|
bool Word::operator<(const Word &compare) const |
|
{ |
|
int ret = Compare(compare); |
|
return ret < 0; |
|
} |
|
|
|
bool Word::operator==(const Word &compare) const |
|
{ |
|
int ret = Compare(compare); |
|
return ret == 0; |
|
} |
|
|
|
void Word::DebugPrint(ostream &out, const Vocab &vocab) const |
|
{ |
|
const string &str = vocab.GetString(m_vocabId); |
|
out << str; |
|
} |
|
|
|
std::ostream& operator<<(std::ostream &out, const Word &word) |
|
{ |
|
out << "("; |
|
out << word.m_vocabId; |
|
|
|
out << (word.m_isNonTerminal ? "n" : "t"); |
|
out << ")"; |
|
|
|
return out; |
|
} |
|
} |
|
|