Commit
·
5596203
1
Parent(s):
0d66979
Upload 4 files
Browse files- alphabet.json +1 -0
- preprocessor_config.json +1 -0
- special_tokens_map.json +1 -6
- tokenizer_config.json +1 -10
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": ["m", "i", "e", "h", "p", "d", "o", "z", "l", "q", "r", "j", "a", "b", "k", "f", "u", "v", "c", "w", "g", "x", "'", " ", "y", "s", "n", "t", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
|
preprocessor_config.json
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
"feature_size": 1,
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
|
|
7 |
"return_attention_mask": false,
|
8 |
"sampling_rate": 16000
|
9 |
}
|
|
|
4 |
"feature_size": 1,
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
+
"processor_class": "Wav2Vec2ProcessorWithLM",
|
8 |
"return_attention_mask": false,
|
9 |
"sampling_rate": 16000
|
10 |
}
|
special_tokens_map.json
CHANGED
@@ -1,6 +1 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "<s>",
|
3 |
-
"eos_token": "</s>",
|
4 |
-
"pad_token": "[PAD]",
|
5 |
-
"unk_token": "[UNK]"
|
6 |
-
}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
|
|
|
|
|
|
|
|
|
tokenizer_config.json
CHANGED
@@ -1,10 +1 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "<s>",
|
3 |
-
"do_lower_case": false,
|
4 |
-
"eos_token": "</s>",
|
5 |
-
"pad_token": "[PAD]",
|
6 |
-
"replace_word_delimiter_char": " ",
|
7 |
-
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
8 |
-
"unk_token": "[UNK]",
|
9 |
-
"word_delimiter_token": "|"
|
10 |
-
}
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": "/root/.cache/huggingface/transformers/fe018b0661558cf92ff53f5e133cdbd4aaaa961c3d1b0c3375bc94110265f845.fea372b8528a479b7415f13ca4e27a2f5f3782cbb3f15b4d19bb3cbe734e8137", "name_or_path": "saribalgar/wav2vec2-base-timit-ms", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2ProcessorWithLM"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|