emre commited on
Commit
3dc65dd
·
1 Parent(s): 6cea5ec

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 51, "</s>": 52}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"շ": 0, "տ": 1, "բ": 2, "։": 3, "՚": 4, "ը": 5, "հ": 6, "զ": 7, "ք": 8, "՞": 9, "ա": 10, "ե": 11, "մ": 12, "ջ": 13, "ծ": 14, "չ": 15, "ժ": 16, "՛": 17, "և": 18, "յ": 19, "օ": 20, "»": 21, "ո": 22, "«": 24, "(": 25, "ղ": 26, "ց": 27, "խ": 28, "լ": 29, "կ": 30, "ձ": 31, "՝": 32, "ի": 33, "վ": 34, "դ": 35, "գ": 36, "ճ": 37, "պ": 38, "ր": 39, "թ": 40, "փ": 41, "ռ": 42, "է": 43, "ւ": 44, ")": 45, "ն": 46, "ս": 47, "ֆ": 48, "|": 23, "[UNK]": 49, "[PAD]": 50}