File size: 2,893 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
#include "tree_fragment_tokenizer.h"
#define BOOST_TEST_MODULE TreeTest
#include <boost/test/unit_test.hpp>
#include <boost/scoped_ptr.hpp>
namespace MosesTraining {
namespace Syntax {
namespace {
BOOST_AUTO_TEST_CASE(tokenize_empty) {
const std::string fragment = "";
std::vector<TreeFragmentToken> tokens;
for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
tokens.push_back(*p);
}
BOOST_REQUIRE(tokens.empty());
}
BOOST_AUTO_TEST_CASE(tokenize_space) {
const std::string fragment = " [ weasel weasel ] [] ] wea[sel";
std::vector<TreeFragmentToken> tokens;
for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
tokens.push_back(*p);
}
BOOST_REQUIRE(tokens.size() == 10);
BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[0].value == "[");
BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[1].value == "weasel");
BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[2].value == "weasel");
BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[3].value == "]");
BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[4].value == "[");
BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[5].value == "]");
BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[6].value == "]");
BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[7].value == "wea");
BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[8].value == "[");
BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[9].value == "sel");
}
BOOST_AUTO_TEST_CASE(tokenize_fragment) {
const std::string fragment = "[S [NP [NN weasels]] [VP]]";
std::vector<TreeFragmentToken> tokens;
for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
tokens.push_back(*p);
}
BOOST_REQUIRE(tokens.size() == 13);
BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_LSB);
BOOST_REQUIRE(tokens[10].type == TreeFragmentToken_WORD);
BOOST_REQUIRE(tokens[11].type == TreeFragmentToken_RSB);
BOOST_REQUIRE(tokens[12].type == TreeFragmentToken_RSB);
}
} // namespace
} // namespace Syntax
} // namespace MosesTraining
|