bayartsogt's picture
add tokenizer
a644a82
raw
history blame contribute delete
442 Bytes
{
"[PAD]": 0,
"[UNK]": 1,
"|": 2,
"а": 3,
"б": 4,
"в": 5,
"г": 6,
"д": 7,
"е": 8,
"ж": 10,
"з": 11,
"и": 12,
"й": 13,
"к": 14,
"л": 15,
"м": 16,
"н": 17,
"о": 18,
"п": 20,
"р": 21,
"с": 22,
"т": 23,
"у": 24,
"ф": 26,
"х": 27,
"ц": 28,
"ч": 29,
"ш": 30,
"ъ": 31,
"ы": 32,
"ь": 33,
"э": 34,
"ю": 35,
"я": 36,
"ё": 9,
"ү": 25,
"ө": 19
}