File size: 400 Bytes
d7c0260 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import pprint
def sort_by_token(tokenizer):
vocab = tokenizer.get_vocab()
sorted_vocab = dict(sorted(vocab.items(), key=lambda item: len(item[0])))
pprint.pprint(sorted_vocab, sort_dicts=False)
def sort_by_id(tokenizer):
vocab = tokenizer.get_vocab()
sorted_vocab = dict(sorted(vocab.items(), key=lambda item: item[1]))
pprint.pprint(sorted_vocab, sort_dicts=False)
|