|
#include "TreeFragmentTokenizer.h" |
|
|
|
#include <cctype> |
|
|
|
namespace Moses |
|
{ |
|
namespace Syntax |
|
{ |
|
namespace F2S |
|
{ |
|
|
|
TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t, |
|
StringPiece v, std::size_t p) |
|
: type(t) |
|
, value(v) |
|
, pos(p) |
|
{ |
|
} |
|
|
|
TreeFragmentTokenizer::TreeFragmentTokenizer() |
|
: value_(TreeFragmentToken_EOS, "", -1) |
|
{ |
|
} |
|
|
|
TreeFragmentTokenizer::TreeFragmentTokenizer(const StringPiece &s) |
|
: str_(s) |
|
, value_(TreeFragmentToken_EOS, "", -1) |
|
, iter_(s.begin()) |
|
, end_(s.end()) |
|
, pos_(0) |
|
{ |
|
++(*this); |
|
} |
|
|
|
TreeFragmentTokenizer &TreeFragmentTokenizer::operator++() |
|
{ |
|
while (iter_ != end_ && (*iter_ == ' ' || *iter_ == '\t')) { |
|
++iter_; |
|
++pos_; |
|
} |
|
|
|
if (iter_ == end_) { |
|
value_ = TreeFragmentToken(TreeFragmentToken_EOS, "", pos_); |
|
return *this; |
|
} |
|
|
|
if (*iter_ == '[') { |
|
value_ = TreeFragmentToken(TreeFragmentToken_LSB, "[", pos_); |
|
++iter_; |
|
++pos_; |
|
} else if (*iter_ == ']') { |
|
value_ = TreeFragmentToken(TreeFragmentToken_RSB, "]", pos_); |
|
++iter_; |
|
++pos_; |
|
} else { |
|
std::size_t start = pos_; |
|
while (true) { |
|
++iter_; |
|
++pos_; |
|
if (iter_ == end_ || *iter_ == ' ' || *iter_ == '\t') { |
|
break; |
|
} |
|
if (*iter_ == '[' || *iter_ == ']') { |
|
break; |
|
} |
|
} |
|
StringPiece word = str_.substr(start, pos_-start); |
|
value_ = TreeFragmentToken(TreeFragmentToken_WORD, word, start); |
|
} |
|
|
|
return *this; |
|
} |
|
|
|
TreeFragmentTokenizer TreeFragmentTokenizer::operator++(int) |
|
{ |
|
TreeFragmentTokenizer tmp(*this); |
|
++*this; |
|
return tmp; |
|
} |
|
|
|
bool operator==(const TreeFragmentTokenizer &lhs, |
|
const TreeFragmentTokenizer &rhs) |
|
{ |
|
if (lhs.value_.type == TreeFragmentToken_EOS || |
|
rhs.value_.type == TreeFragmentToken_EOS) { |
|
return lhs.value_.type == TreeFragmentToken_EOS && |
|
rhs.value_.type == TreeFragmentToken_EOS; |
|
} |
|
return lhs.iter_ == rhs.iter_; |
|
} |
|
|
|
bool operator!=(const TreeFragmentTokenizer &lhs, |
|
const TreeFragmentTokenizer &rhs) |
|
{ |
|
return !(lhs == rhs); |
|
} |
|
|
|
} |
|
} |
|
} |
|
|