add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ó": 0, "ạ": 1, "ừ": 2, "ẹ": 3, "ù": 4, "ợ": 5, "à": 6, "f": 7, "ẻ": 8, "ộ": 9, "g": 10, "ẽ": 11, "ố": 12, "ỷ": 13, "ỉ": 14, "ồ": 15, "ủ": 16, "b": 17, "ì": 18, "ỏ": 19, "ô": 20, "ữ": 21, "ờ": 22, "ĩ": 24, "ề": 25, "m": 26, "è": 27, "ễ": 28, "ấ": 29, "ở": 30, "ẵ": 31, "ử": 32, "y": 33, "í": 34, "e": 35, "c": 36, "ầ": 37, "ắ": 38, "d": 39, "ú": 40, "s": 41, "ơ": 42, "ỳ": 43, "z": 44, "p": 45, "w": 46, "đ": 47, "n": 48, "r": 49, "o": 50, "ọ": 51, "l": 52, "ê": 53, "õ": 54, "ẫ": 55, "ụ": 56, "ặ": 57, "ỹ": 58, "ứ": 59, "t": 60, "ể": 61, "a": 62, "ũ": 63, "ẳ": 64, "ò": 65, "ả": 66, "h": 67, "ỗ": 68, "ỡ": 69, "i": 70, "ă": 71, "ớ": 72, "ự": 73, "â": 74, "á": 75, "j": 76, "ư": 77, "ỵ": 78, "ằ": 79, "ệ": 80, "ẩ": 81, "x": 82, "ý": 83, "ậ": 84, "é": 85, "ế": 86, "u": 87, "v": 88, "ã": 89, "q": 90, "ị": 91, "k": 92, "ổ": 93, "|": 23, "[UNK]": 94, "[PAD]": 95}
|