add tokenizer
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 51, "</s>": 52}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"շ": 0, "տ": 1, "բ": 2, "։": 3, "՚": 4, "ը": 5, "հ": 6, "զ": 7, "ք": 8, "՞": 9, "ա": 10, "ե": 11, "մ": 12, "ջ": 13, "ծ": 14, "չ": 15, "ժ": 16, "՛": 17, "և": 18, "յ": 19, "օ": 20, "»": 21, "ո": 22, "«": 24, "(": 25, "ղ": 26, "ց": 27, "խ": 28, "լ": 29, "կ": 30, "ձ": 31, "՝": 32, "ի": 33, "վ": 34, "դ": 35, "գ": 36, "ճ": 37, "պ": 38, "ր": 39, "թ": 40, "փ": 41, "ռ": 42, "է": 43, "ւ": 44, ")": 45, "ն": 46, "ս": 47, "ֆ": 48, "|": 23, "[UNK]": 49, "[PAD]": 50}
|