|
#include "pcfg.h" |
|
|
|
#include <cassert> |
|
|
|
#include <boost/algorithm/string.hpp> |
|
#include <boost/lexical_cast.hpp> |
|
|
|
#include "syntax-common/exception.h" |
|
|
|
namespace MosesTraining { |
|
namespace Syntax { |
|
|
|
void Pcfg::Add(const Key &key, double score) { |
|
rules_[key] = score; |
|
} |
|
|
|
bool Pcfg::Lookup(const Key &key, double &score) const { |
|
Map::const_iterator p = rules_.find(key); |
|
if (p == rules_.end()) { |
|
return false; |
|
} |
|
score = p->second; |
|
return true; |
|
} |
|
|
|
void Pcfg::Read(std::istream &input, Vocabulary &vocab) { |
|
std::string line; |
|
std::string lhs_string; |
|
std::vector<std::string> rhs_strings; |
|
std::string score_string; |
|
Key key; |
|
while (std::getline(input, line)) { |
|
|
|
std::size_t pos = line.find("|||"); |
|
if (pos == std::string::npos) { |
|
throw Exception("missing first delimiter"); |
|
} |
|
lhs_string = line.substr(0, pos); |
|
boost::trim(lhs_string); |
|
|
|
|
|
std::size_t begin = pos+3; |
|
pos = line.find("|||", begin); |
|
if (pos == std::string::npos) { |
|
throw Exception("missing second delimiter"); |
|
} |
|
std::string rhs_text = line.substr(begin, pos-begin); |
|
boost::trim(rhs_text); |
|
rhs_strings.clear(); |
|
boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(), |
|
boost::algorithm::token_compress_on); |
|
|
|
|
|
score_string = line.substr(pos+3); |
|
boost::trim(score_string); |
|
|
|
|
|
key.clear(); |
|
key.reserve(rhs_strings.size()+1); |
|
key.push_back(vocab.Insert(lhs_string)); |
|
for (std::vector<std::string>::const_iterator p = rhs_strings.begin(); |
|
p != rhs_strings.end(); ++p) { |
|
key.push_back(vocab.Insert(*p)); |
|
} |
|
|
|
|
|
double score = boost::lexical_cast<double>(score_string); |
|
Add(key, score); |
|
} |
|
} |
|
|
|
void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const { |
|
for (const_iterator p = begin(); p != end(); ++p) { |
|
const Key &key = p->first; |
|
double score = p->second; |
|
std::vector<std::size_t>::const_iterator q = key.begin(); |
|
std::vector<std::size_t>::const_iterator end = key.end(); |
|
output << vocab.Lookup(*q++) << " |||"; |
|
while (q != end) { |
|
output << " " << vocab.Lookup(*q++); |
|
} |
|
output << " ||| " << score << std::endl; |
|
} |
|
} |
|
|
|
} |
|
} |
|
|