File size: 1,628 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
#pragma once
#include "util/string_piece.hh"
namespace Moses
{
namespace Syntax
{
namespace F2S
{
enum TreeFragmentTokenType {
TreeFragmentToken_EOS,
TreeFragmentToken_LSB,
TreeFragmentToken_RSB,
TreeFragmentToken_WORD
};
struct TreeFragmentToken {
public:
TreeFragmentToken(TreeFragmentTokenType, StringPiece, std::size_t);
TreeFragmentTokenType type;
StringPiece value;
std::size_t pos;
};
// Tokenizes tree fragment strings in Moses format.
//
// For example, the string "[NP [NP [NN a]] [NP]]" is tokenized to the sequence:
//
// 1 LSB "["
// 2 WORD "NP"
// 3 LSB "["
// 4 WORD "NP"
// 5 LSB "["
// 6 WORD "NN"
// 7 WORD "a"
// 8 RSB "]"
// 9 RSB "]"
// 10 LSB "["
// 11 WORD "NP"
// 12 RSB "]"
// 13 RSB "]"
// 14 EOS undefined
//
class TreeFragmentTokenizer
{
public:
TreeFragmentTokenizer();
TreeFragmentTokenizer(const StringPiece &);
const TreeFragmentToken &operator*() const {
return value_;
}
const TreeFragmentToken *operator->() const {
return &value_;
}
TreeFragmentTokenizer &operator++();
TreeFragmentTokenizer operator++(int);
friend bool operator==(const TreeFragmentTokenizer &,
const TreeFragmentTokenizer &);
friend bool operator!=(const TreeFragmentTokenizer &,
const TreeFragmentTokenizer &);
private:
StringPiece str_;
TreeFragmentToken value_;
StringPiece::const_iterator iter_;
StringPiece::const_iterator end_;
std::size_t pos_;
};
} // namespace F2S
} // namespace Syntax
} // namespace Moses
|