Spaces:
Running
Running
File size: 783 Bytes
9e582c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
class Language:
def __init__(self, name):
self.name = name
self.char2index = {'#': 0, '$': 1, '^': 2} # '^': start of sequence, '$' : unknown char, '#' : padding
self.index2char = {0: '#', 1: '$', 2: '^'}
self.vocab_size = 3 # Count
def addWord(self, word):
for char in word:
self.addChar(char)
def addChar(self, char):
if char not in self.char2index:
self.char2index[char] = self.vocab_size
self.index2char[self.vocab_size] = char
self.vocab_size += 1
def encode(self, s):
return [self.char2index[ch] for ch in s]
def decode(self, l):
return ''.join([self.index2char[i] for i in l])
def vocab(self):
return self.char2index.keys()
|