File size: 1,059 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
#include "LexicalTable.h"
#include "util/tokenize_piece.hh"
#include <cstdlib>
#include <iostream>
namespace MosesTraining
{
namespace Syntax
{
namespace ScoreStsg
{
LexicalTable::LexicalTable(Vocabulary &srcVocab, Vocabulary &tgtVocab)
: m_srcVocab(srcVocab)
, m_tgtVocab(tgtVocab)
{
}
void LexicalTable::Load(std::istream &input)
{
const util::AnyCharacter delimiter(" \t");
std::string line;
std::string tmp;
int i = 0;
while (getline(input, line)) {
++i;
if (i%100000 == 0) {
std::cerr << ".";
}
util::TokenIter<util::AnyCharacter> it(line, delimiter);
// Target word
it->CopyToString(&tmp);
Vocabulary::IdType tgtId = m_tgtVocab.Insert(tmp);
++it;
// Source word.
it->CopyToString(&tmp);
Vocabulary::IdType srcId = m_srcVocab.Insert(tmp);
++it;
// Probability.
it->CopyToString(&tmp);
double prob = atof(tmp.c_str());
m_table[srcId][tgtId] = prob;
}
std::cerr << std::endl;
}
} // namespace ScoreStsg
} // namespace Syntax
} // namespace MosesTraining
|