mgtoxd commited on
Commit
1074431
·
1 Parent(s): 11770f6

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -6
  2. tokenizer_config.json +1 -10
  3. vocab.json +1 -37
special_tokens_map.json CHANGED
@@ -1,6 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
- }
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,10 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "do_lower_case": false,
4
- "eos_token": "</s>",
5
- "pad_token": "[PAD]",
6
- "replace_word_delimiter_char": " ",
7
- "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
- "unk_token": "[UNK]",
9
- "word_delimiter_token": "|"
10
- }
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
 
 
 
 
 
 
 
 
 
vocab.json CHANGED
@@ -1,37 +1 @@
1
- {
2
- "'": 11,
3
- "2": 7,
4
- "3": 31,
5
- "4": 14,
6
- "5": 10,
7
- "6": 27,
8
- "[PAD]": 34,
9
- "[UNK]": 33,
10
- "a": 24,
11
- "b": 13,
12
- "c": 16,
13
- "d": 26,
14
- "e": 12,
15
- "f": 1,
16
- "g": 18,
17
- "h": 19,
18
- "i": 2,
19
- "j": 5,
20
- "k": 6,
21
- "l": 3,
22
- "m": 29,
23
- "n": 30,
24
- "o": 0,
25
- "p": 22,
26
- "q": 21,
27
- "r": 8,
28
- "s": 23,
29
- "t": 4,
30
- "u": 17,
31
- "v": 32,
32
- "w": 15,
33
- "x": 25,
34
- "y": 20,
35
- "z": 28,
36
- "|": 9
37
- }
 
1
+ {"d": 0, "2": 2, "4": 3, "g": 4, "e": 5, "l": 6, "s": 7, "j": 8, "6": 9, "3": 10, "r": 11, "v": 12, "o": 13, "w": 14, "c": 15, "n": 16, "y": 17, "b": 18, "k": 19, "f": 20, "t": 21, "x": 22, "m": 23, "z": 24, "u": 25, "'": 26, "a": 27, "5": 28, "i": 29, "p": 30, "q": 31, "h": 32, "|": 1, "[UNK]": 33, "[PAD]": 34}