riri-en-np / vocab.json
roviso's picture
Upload tokenizer
d57bbae
raw
history blame
2.57 kB
{
"npi": {
"\n": 0,
"!": 2,
"\"": 3,
"#": 4,
"$": 5,
"%": 6,
"&": 7,
"'": 8,
"(": 9,
")": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"=": 28,
"?": 29,
"A": 30,
"B": 31,
"C": 32,
"D": 33,
"E": 34,
"F": 35,
"G": 36,
"H": 37,
"I": 38,
"J": 39,
"K": 40,
"L": 41,
"M": 42,
"N": 43,
"O": 44,
"P": 45,
"Q": 46,
"R": 47,
"S": 48,
"T": 49,
"U": 50,
"V": 51,
"W": 52,
"Y": 53,
"Z": 54,
"[": 55,
"[PAD]": 176,
"[UNK]": 175,
"]": 56,
"_": 57,
"`": 58,
"a": 59,
"b": 60,
"c": 61,
"d": 62,
"e": 63,
"f": 64,
"g": 65,
"h": 66,
"i": 67,
"j": 68,
"k": 69,
"l": 70,
"m": 71,
"n": 72,
"o": 73,
"p": 74,
"q": 75,
"r": 76,
"s": 77,
"t": 78,
"u": 79,
"v": 80,
"w": 81,
"x": 82,
"y": 83,
"z": 84,
"|": 1,
"¥": 86,
"«": 87,
"®": 88,
"·": 89,
"Ï": 90,
"è": 91,
"é": 92,
"÷": 93,
"ँ": 94,
"ं": 95,
"ः": 96,
"अ": 97,
"आ": 98,
"इ": 99,
"ई": 100,
"उ": 101,
"ऊ": 102,
"ऋ": 103,
"ए": 104,
"ऐ": 105,
"ओ": 106,
"औ": 107,
"क": 108,
"ख": 109,
"ग": 110,
"घ": 111,
"ङ": 112,
"च": 113,
"छ": 114,
"ज": 115,
"झ": 116,
"ञ": 117,
"ट": 118,
"ठ": 119,
"ड": 120,
"ढ": 121,
"ण": 122,
"त": 123,
"थ": 124,
"द": 125,
"ध": 126,
"न": 127,
"प": 128,
"फ": 129,
"ब": 130,
"भ": 131,
"म": 132,
"य": 133,
"र": 134,
"ल": 135,
"व": 136,
"श": 137,
"ष": 138,
"स": 139,
"ह": 140,
"़": 141,
"ा": 142,
"ि": 143,
"ी": 144,
"ु": 145,
"ू": 146,
"ृ": 147,
"े": 148,
"ै": 149,
"ो": 150,
"ौ": 151,
"्": 152,
"ॐ": 153,
"।": 154,
"०": 155,
"१": 156,
"२": 157,
"३": 158,
"४": 159,
"५": 160,
"६": 161,
"७": 162,
"८": 163,
"९": 164,
"​": 165,
"‌": 166,
"‍": 167,
"–": 168,
"—": 169,
"‘": 170,
"’": 171,
"“": 172,
"”": 173,
"…": 174,
" ": 175
}
}