File size: 2,129 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
#include "TreeFragmentTokenizer.h"
#include <cctype>
namespace Moses
{
namespace Syntax
{
namespace F2S
{
TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t,
StringPiece v, std::size_t p)
: type(t)
, value(v)
, pos(p)
{
}
TreeFragmentTokenizer::TreeFragmentTokenizer()
: value_(TreeFragmentToken_EOS, "", -1)
{
}
TreeFragmentTokenizer::TreeFragmentTokenizer(const StringPiece &s)
: str_(s)
, value_(TreeFragmentToken_EOS, "", -1)
, iter_(s.begin())
, end_(s.end())
, pos_(0)
{
++(*this);
}
TreeFragmentTokenizer &TreeFragmentTokenizer::operator++()
{
while (iter_ != end_ && (*iter_ == ' ' || *iter_ == '\t')) {
++iter_;
++pos_;
}
if (iter_ == end_) {
value_ = TreeFragmentToken(TreeFragmentToken_EOS, "", pos_);
return *this;
}
if (*iter_ == '[') {
value_ = TreeFragmentToken(TreeFragmentToken_LSB, "[", pos_);
++iter_;
++pos_;
} else if (*iter_ == ']') {
value_ = TreeFragmentToken(TreeFragmentToken_RSB, "]", pos_);
++iter_;
++pos_;
} else {
std::size_t start = pos_;
while (true) {
++iter_;
++pos_;
if (iter_ == end_ || *iter_ == ' ' || *iter_ == '\t') {
break;
}
if (*iter_ == '[' || *iter_ == ']') {
break;
}
}
StringPiece word = str_.substr(start, pos_-start);
value_ = TreeFragmentToken(TreeFragmentToken_WORD, word, start);
}
return *this;
}
TreeFragmentTokenizer TreeFragmentTokenizer::operator++(int)
{
TreeFragmentTokenizer tmp(*this);
++*this;
return tmp;
}
bool operator==(const TreeFragmentTokenizer &lhs,
const TreeFragmentTokenizer &rhs)
{
if (lhs.value_.type == TreeFragmentToken_EOS ||
rhs.value_.type == TreeFragmentToken_EOS) {
return lhs.value_.type == TreeFragmentToken_EOS &&
rhs.value_.type == TreeFragmentToken_EOS;
}
return lhs.iter_ == rhs.iter_;
}
bool operator!=(const TreeFragmentTokenizer &lhs,
const TreeFragmentTokenizer &rhs)
{
return !(lhs == rhs);
}
} // namespace F2S
} // namespace Syntax
} // namespace Moses
|