File size: 422 Bytes
f8bd4d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import json


class Tokenizer:
    def __init__(self, word2int_path, int2word_path):
        with open(word2int_path, 'r') as f:
            self.word2int = json.load(f)
        with open(int2word_path, 'r') as f:
             self.int2word = {int(k): v for k, v in json.load(f).items()}

    def encode(self, word):
        return self.word2int[word]

    def decode(self, int_val):
        return self.int2word[int_val]