File size: 400 Bytes
d7c0260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import pprint

def sort_by_token(tokenizer):
    vocab = tokenizer.get_vocab()
    
    sorted_vocab = dict(sorted(vocab.items(), key=lambda item: len(item[0])))

    pprint.pprint(sorted_vocab, sort_dicts=False)

def sort_by_id(tokenizer):
    vocab = tokenizer.get_vocab()

    sorted_vocab = dict(sorted(vocab.items(), key=lambda item: item[1]))

    pprint.pprint(sorted_vocab, sort_dicts=False)