kingabzpro
commited on
Commit
•
3af2f97
1
Parent(s):
888b8a3
add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"¤": 1, "ਂ": 2, "ਅ": 3, "ਆ": 4, "ਇ": 5, "ਈ": 6, "ਉ": 7, "ਊ": 8, "ਏ": 9, "ਐ": 10, "ਓ": 11, "ਔ": 12, "ਕ": 13, "ਖ": 14, "ਗ": 15, "ਘ": 16, "ਚ": 17, "ਛ": 18, "ਜ": 19, "ਝ": 20, "ਟ": 21, "ਠ": 22, "ਡ": 23, "ਢ": 24, "ਣ": 25, "ਤ": 26, "ਥ": 27, "ਦ": 28, "ਧ": 29, "ਨ": 30, "ਪ": 31, "ਫ": 32, "ਬ": 33, "ਭ": 34, "ਮ": 35, "ਯ": 36, "ਰ": 37, "ਲ": 38, "ਲ਼": 39, "ਵ": 40, "ਸ਼": 41, "ਸ": 42, "ਹ": 43, "਼": 44, "ਾ": 45, "ਿ": 46, "ੀ": 47, "ੁ": 48, "ੂ": 49, "ੇ": 50, "ੈ": 51, "ੋ": 52, "ੌ": 53, "੍": 54, "ਖ਼": 55, "ਗ਼": 56, "ਜ਼": 57, "ੜ": 58, "ਫ਼": 59, "ੰ": 60, "ੱ": 61, "–": 62, "’": 63, "…": 64, "|": 0, "<unk>": 65, "<pad>": 66, "<s>": 67, "</s>": 68}
|