Spaces:
Runtime error
Runtime error
File size: 4,174 Bytes
74fc30d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import unittest
import mock
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)
from apply_bpe import isolate_glossary, BPE
class TestIsolateGlossaryFunction(unittest.TestCase):
def setUp(self):
self.glossary = 'like'
def _run_test_case(self, test_case):
orig, expected = test_case
out = isolate_glossary(orig, self.glossary)
self.assertEqual(out, expected)
def test_empty_string(self):
orig = ''
exp = ['']
test_case = (orig, exp)
self._run_test_case(test_case)
def test_no_glossary(self):
orig = 'word'
exp = ['word']
test_case = (orig, exp)
self._run_test_case(test_case)
def test_isolated_glossary(self):
orig = 'like'
exp = ['like']
test_case = (orig, exp)
self._run_test_case(test_case)
def test_word_one_side(self):
orig = 'likeword'
exp = ['like', 'word']
test_case = (orig, exp)
self._run_test_case(test_case)
def test_words_both_sides(self):
orig = 'wordlikeword'
exp = ['word', 'like', 'word']
test_case = (orig, exp)
self._run_test_case(test_case)
def test_back_to_back_glossary(self):
orig = 'likelike'
exp = ['like', 'like']
test_case = (orig, exp)
self._run_test_case(test_case)
def test_multiple_glossaries(self):
orig = 'wordlikewordlike'
exp = ['word', 'like', 'word', 'like']
test_case = (orig, exp)
self._run_test_case(test_case)
class TestBPEIsolateGlossariesMethod(unittest.TestCase):
def setUp(self):
amock = mock.MagicMock()
amock.readline.return_value = 'something'
glossaries = ['like', 'Manuel', 'USA']
self.bpe = BPE(amock, glossaries=glossaries)
def _run_test_case(self, test_case):
orig, expected = test_case
out = self.bpe._isolate_glossaries(orig)
self.assertEqual(out, expected)
def test_multiple_glossaries(self):
orig = 'wordlikeUSAwordManuelManuelwordUSA'
exp = ['word', 'like', 'USA', 'word', 'Manuel', 'Manuel', 'word', 'USA']
test_case = (orig, exp)
self._run_test_case(test_case)
class TestRegexIsolateGlossaries(unittest.TestCase):
def setUp(self):
amock = mock.MagicMock()
amock.readline.return_value = 'something'
glossaries = ["<country>\w*</country>", "<name>\w*</name>", "\d+"]
self.bpe = BPE(amock, glossaries=glossaries)
def _run_test_case(self, test_case):
orig, expected = test_case
out = self.bpe._isolate_glossaries(orig)
self.assertEqual(out, expected)
def test_regex_glossaries(self):
orig = 'wordlike<country>USA</country>word10001word<name>Manuel</name>word<country>USA</country>'
exp = ['wordlike', '<country>USA</country>', 'word', '10001', 'word', '<name>Manuel</name>', 'word', '<country>USA</country>']
test_case = (orig, exp)
self._run_test_case(test_case)
def encode_mock(segment, x2, x3, x4, x5, x6, x7, glosses, dropout):
if glosses.match(segment):
return (segment,)
else:
l = len(segment)
return (segment[:l//2], segment[l//2:])
class TestBPESegmentMethod(unittest.TestCase):
def setUp(self):
amock = mock.MagicMock()
amock.readline.return_value = 'something'
glossaries = ['like', 'Manuel', 'USA']
self.bpe = BPE(amock, glossaries=glossaries)
@mock.patch('apply_bpe.encode', side_effect=encode_mock)
def _run_test_case(self, test_case, encode_function):
orig, expected = test_case
out = self.bpe.segment(orig)
self.assertEqual(out, expected)
def test_multiple_glossaries(self):
orig = 'wordlikeword likeManuelword'
exp = 'wo@@ rd@@ like@@ wo@@ rd like@@ Manuel@@ wo@@ rd'
test_case = (orig, exp)
self._run_test_case(test_case)
if __name__ == '__main__':
unittest.main()
|