add tokenizer
Browse files- added_tokens.json +1 -1
- vocab.json +1 -1
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<s>":
|
|
|
1 |
+
{"<s>": 95, "</s>": 96}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ऐ": 0, "e": 1, "ऊ": 2, "इ": 3, "ल": 4, "उ": 5, "ॉ": 6, "य": 7, "ः": 8, "ू": 9, "v": 10, "'": 11, "म": 12, "ऋ": 13, "ा": 14, "o": 15, "ँ": 16, "ज": 17, "औ": 18, "़": 20, "क़": 21, "ग": 22, "h": 23, "t": 24, "भ": 25, "ग़": 26, "घ": 27, "p": 28, "ध": 29, "र": 30, "w": 31, "ै": 32, "च": 33, "r": 34, "n": 35, "े": 36, "ठ": 37, "ढ": 38, "g": 39, "अ": 40, "ी": 41, "ड़": 42, "ओ": 43, "d": 44, "ण": 45, "क": 46, "आ": 47, "f": 48, "ख": 49, "ॅ": 50, "छ": 51, "s": 52, "j": 53, "i": 54, "त": 55, "x": 56, "ड": 57, "ट": 58, "ह": 59, "प": 60, "ृ": 61, "ष": 62, "फ": 63, "ि": 64, "k": 65, "m": 66, "ढ़": 67, "ो": 68, "झ": 69, "l": 70, "b": 71, "ऑ": 72, "u": 73, "c": 74, "थ": 75, "ज़": 76, "न": 77, "स": 78, "द": 79, "्": 80, "ञ": 81, "y": 82, "z": 83, "श": 84, "ए": 85, "ब": 86, "ई": 87, "ु": 88, "a": 89, "ौ": 90, "व": 91, "ं": 92, "|": 19, "[UNK]": 93, "[PAD]": 94}
|