phongdtd commited on
Commit
d1ecb39
·
1 Parent(s): adbe339

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ư": 0, "ý": 1, "": 2, "": 3, "ê": 4, "ó": 5, "": 6, "í": 7, "": 8, "c": 9, "t": 10, "": 11, "n": 12, "": 13, "é": 14, "u": 15, "ă": 16, "h": 17, "e": 18, "": 19, "": 20, "ù": 21, "a": 22, "è": 23, "ị": 24, "": 25, "": 26, "": 27, "y": 28, "": 29, "l": 30, "": 31, "ũ": 32, "i": 33, "x": 34, "": 35, "á": 36, "": 37, "r": 38, "": 39, "": 40, "b": 41, "": 42, "": 43, "": 44, "ĩ": 46, "": 47, "": 48, "": 49, "s": 50, "": 51, "g": 52, "ơ": 53, "": 54, "ú": 55, "": 56, "v": 57, "f": 58, "": 59, "o": 60, "ì": 61, "ế": 62, "": 63, "đ": 64, "": 65, "": 66, "": 67, "": 68, "": 69, "ò": 70, "ã": 71, "": 72, "õ": 73, "": 74, "z": 75, "": 76, "w": 77, "": 78, "": 79, "": 80, "k": 81, "": 82, "â": 83, "m": 84, "j": 85, "d": 86, "à": 87, "": 88, "ô": 89, "q": 90, "p": 91, "": 92, "": 93, "|": 45, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"ó": 0, "": 1, "": 2, "": 3, "ù": 4, "": 5, "à": 6, "f": 7, "": 8, "": 9, "g": 10, "": 11, "": 12, "": 13, "": 14, "": 15, "": 16, "b": 17, "ì": 18, "": 19, "ô": 20, "": 21, "": 22, "ĩ": 24, "": 25, "m": 26, "è": 27, "": 28, "": 29, "": 30, "": 31, "": 32, "y": 33, "í": 34, "e": 35, "c": 36, "": 37, "": 38, "d": 39, "ú": 40, "s": 41, "ơ": 42, "": 43, "z": 44, "p": 45, "w": 46, "đ": 47, "n": 48, "r": 49, "o": 50, "": 51, "l": 52, "ê": 53, "õ": 54, "": 55, "": 56, "": 57, "": 58, "": 59, "t": 60, "": 61, "a": 62, "ũ": 63, "": 64, "ò": 65, "": 66, "h": 67, "": 68, "": 69, "i": 70, "ă": 71, "": 72, "": 73, "â": 74, "á": 75, "j": 76, "ư": 77, "": 78, "": 79, "": 80, "": 81, "x": 82, "ý": 83, "": 84, "é": 85, "ế": 86, "u": 87, "v": 88, "ã": 89, "q": 90, "": 91, "k": 92, "": 93, "|": 23, "[UNK]": 94, "[PAD]": 95}