Spaces:
Sleeping
Sleeping
File size: 411 Bytes
b57fe5a |
1 2 3 4 5 6 7 8 9 10 11 12 |
class CharacterTokenizer:
def __init__(self, text):
chars = sorted(list(set(text)))
self.vocab_size = len(chars)
self.stoi = {ch: i for i, ch in enumerate(chars)}
self.itos = {i: ch for i, ch in enumerate(chars)}
def encode(self, s):
return [self.stoi[c] for c in s]
def decode(self, l):
return "".join([self.itos[i] for i in l]) |