|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <algorithm> |
|
#include <iostream> |
|
#include "moses/Util.h" |
|
#include "TargetPhraseCollection.h" |
|
#include "Vocab.h" |
|
#include "OnDiskWrapper.h" |
|
|
|
using namespace std; |
|
|
|
namespace OnDiskPt |
|
{ |
|
|
|
size_t TargetPhraseCollection::s_sortScoreInd; |
|
|
|
TargetPhraseCollection::TargetPhraseCollection() |
|
:m_filePos(777) |
|
{} |
|
|
|
TargetPhraseCollection::TargetPhraseCollection(const TargetPhraseCollection ©) |
|
:m_filePos(copy.m_filePos) |
|
,m_debugStr(copy.m_debugStr) |
|
{ |
|
} |
|
|
|
TargetPhraseCollection::~TargetPhraseCollection() |
|
{ |
|
Moses::RemoveAllInColl(m_coll); |
|
} |
|
|
|
void TargetPhraseCollection::AddTargetPhrase(TargetPhrase *targetPhrase) |
|
{ |
|
m_coll.push_back(targetPhrase); |
|
} |
|
|
|
void TargetPhraseCollection::Sort(size_t tableLimit) |
|
{ |
|
std::sort(m_coll.begin(), m_coll.end(), TargetPhraseOrderByScore()); |
|
|
|
if (tableLimit && m_coll.size() > tableLimit) { |
|
CollType::iterator iter; |
|
for (iter = m_coll.begin() + tableLimit ; iter != m_coll.end(); ++iter) { |
|
delete *iter; |
|
} |
|
m_coll.resize(tableLimit); |
|
} |
|
} |
|
|
|
void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper) |
|
{ |
|
std::fstream &file = onDiskWrapper.GetFileTargetColl(); |
|
|
|
size_t memUsed = sizeof(uint64_t); |
|
char *mem = (char*) malloc(memUsed); |
|
|
|
|
|
uint64_t numPhrases = GetSize(); |
|
((uint64_t*)mem)[0] = numPhrases; |
|
|
|
|
|
CollType::iterator iter; |
|
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) { |
|
|
|
TargetPhrase &targetPhrase = **iter; |
|
targetPhrase.Save(onDiskWrapper); |
|
|
|
|
|
size_t memUsedTPOtherInfo; |
|
char *memTPOtherInfo = targetPhrase.WriteOtherInfoToMemory(onDiskWrapper, memUsedTPOtherInfo); |
|
|
|
|
|
mem = (char*) realloc(mem, memUsed + memUsedTPOtherInfo); |
|
memcpy(mem + memUsed, memTPOtherInfo, memUsedTPOtherInfo); |
|
memUsed += memUsedTPOtherInfo; |
|
|
|
free(memTPOtherInfo); |
|
} |
|
|
|
|
|
|
|
|
|
uint64_t startPos = file.tellp(); |
|
file.seekp(0, ios::end); |
|
file.write((char*) mem, memUsed); |
|
|
|
free(mem); |
|
|
|
#ifndef NDEBUG |
|
uint64_t endPos = file.tellp(); |
|
assert(startPos + memUsed == endPos); |
|
#endif |
|
m_filePos = startPos; |
|
|
|
} |
|
|
|
void TargetPhraseCollection::ReadFromFile(size_t tableLimit, uint64_t filePos, OnDiskWrapper &onDiskWrapper) |
|
{ |
|
fstream &fileTPColl = onDiskWrapper.GetFileTargetColl(); |
|
fstream &fileTP = onDiskWrapper.GetFileTargetInd(); |
|
|
|
size_t numScores = onDiskWrapper.GetNumScores(); |
|
|
|
|
|
uint64_t numPhrases; |
|
|
|
uint64_t currFilePos = filePos; |
|
fileTPColl.seekg(filePos); |
|
fileTPColl.read((char*) &numPhrases, sizeof(uint64_t)); |
|
|
|
|
|
if (tableLimit) { |
|
numPhrases = std::min(numPhrases, (uint64_t) tableLimit); |
|
} |
|
|
|
currFilePos += sizeof(uint64_t); |
|
|
|
for (size_t ind = 0; ind < numPhrases; ++ind) { |
|
TargetPhrase *tp = new TargetPhrase(numScores); |
|
|
|
uint64_t sizeOtherInfo = tp->ReadOtherInfoFromFile(currFilePos, fileTPColl); |
|
tp->ReadFromFile(fileTP); |
|
|
|
currFilePos += sizeOtherInfo; |
|
|
|
m_coll.push_back(tp); |
|
} |
|
} |
|
|
|
uint64_t TargetPhraseCollection::GetFilePos() const |
|
{ |
|
return m_filePos; |
|
} |
|
|
|
const std::string TargetPhraseCollection::GetDebugStr() const |
|
{ |
|
return m_debugStr; |
|
} |
|
|
|
void TargetPhraseCollection::SetDebugStr(const std::string &str) |
|
{ |
|
m_debugStr = str; |
|
} |
|
|
|
const TargetPhrase &TargetPhraseCollection::GetTargetPhrase(size_t ind) const |
|
{ |
|
assert(ind < GetSize()); |
|
return *m_coll[ind]; |
|
} |
|
|
|
} |
|
|
|
|
|
|