kingabzpro
commited on
Commit
•
31d25b6
1
Parent(s):
5aa67cc
add tokenizer
Browse files- special_tokens_map.json +1 -1
- tokenizer_config.json +1 -1
- vocab.json +1 -1
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": "/workspace/.cache/huggingface/transformers/93e627c1d485fcab8a79fcf898b021187013b29075034ece7e0e46dfa29292ec.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd", "tokenizer_file": null, "name_or_path": "Harveenchadha/vakyansh-wav2vec2-punjabi-pam-10", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3, "|": 4, "ਂ": 5, "ਅ": 6, "ਆ": 7, "ਇ": 8, "ਈ": 9, "ਉ": 10, "ਊ": 11, "ਏ": 12, "ਐ": 13, "ਓ": 14, "ਔ": 15, "ਕ": 16, "ਖ": 17, "ਗ": 18, "ਘ": 19, "ਚ": 20, "ਛ": 21, "ਜ": 22, "ਝ": 23, "ਟ": 24, "ਠ": 25, "ਡ": 26, "ਢ": 27, "ਣ": 28, "ਤ": 29, "ਥ": 30, "ਦ": 31, "ਧ": 32, "ਨ": 33, "ਪ": 34, "ਫ": 35, "ਬ": 36, "ਭ": 37, "ਮ": 38, "ਯ": 39, "ਰ": 40, "ਲ": 41, "ਲ਼": 42, "ਵ": 43, "ਸ਼": 44, "ਸ": 45, "ਹ": 46, "਼": 47, "ਾ": 48, "ਿ": 49, "ੀ": 50, "ੁ": 51, "ੂ": 52, "ੇ": 53, "ੈ": 54, "ੋ": 55, "ੌ": 56, "੍": 57, "ਖ਼": 58, "ਗ਼": 59, "ਜ਼": 60, "ੜ": 61, "ਫ਼": 62, "ੰ": 63, "ੱ": 64}
|