add eval
Browse files- .ipynb_checkpoints/added_tokens-checkpoint.json +1 -0
- .ipynb_checkpoints/alphabet-checkpoint.json +1 -1
- .ipynb_checkpoints/eval-checkpoint.py +3 -0
- .ipynb_checkpoints/log_openslr_SLR53_train[95%:]_predictions-checkpoint.txt +0 -0
- .ipynb_checkpoints/log_openslr_SLR53_train[95%:]_targets-checkpoint.txt +0 -0
- .ipynb_checkpoints/openslr_SLR53_train[95%:]_eval_results-checkpoint.txt +2 -0
- .ipynb_checkpoints/special_tokens_map-checkpoint.json +1 -0
- alphabet.json +1 -1
- eval.py +3 -0
- log_openslr_SLR53_train[95%:]_predictions.txt +0 -0
- openslr_SLR53_train[95%:]_eval_results.txt +2 -2
.ipynb_checkpoints/added_tokens-checkpoint.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 110, "</s>": 111}
|
.ipynb_checkpoints/alphabet-checkpoint.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "।", "ঁ", "ং", "ঃ", "অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "এ", "ঐ", "ও", "ঔ", "ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "শ", "ষ", "স", "হ", "়", "া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "্", "ৎ", "ৗ", "ড়", "ঢ়", "য়", "০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ৰ", "
|
|
|
1 |
+
{"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "", "", "œ", "।", "ঁ", "ং", "ঃ", "অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "এ", "ঐ", "ও", "ঔ", "ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "শ", "ষ", "স", "হ", "়", "া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "্", "ৎ", "ৗ", "ড়", "ঢ়", "য়", "০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ৰ", "", "", "", "⁇", "", "<s>", "</s>"], "is_bpe": false}
|
.ipynb_checkpoints/eval-checkpoint.py
CHANGED
@@ -87,6 +87,9 @@ def main(args):
|
|
87 |
model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
|
88 |
processor = transformers.AutoProcessor.from_pretrained(args.model_id)
|
89 |
vocab_dict = processor.tokenizer.get_vocab()
|
|
|
|
|
|
|
90 |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
91 |
print(list(sorted_vocab_dict))
|
92 |
|
|
|
87 |
model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
|
88 |
processor = transformers.AutoProcessor.from_pretrained(args.model_id)
|
89 |
vocab_dict = processor.tokenizer.get_vocab()
|
90 |
+
print(list(processor.tokenizer.get_vocab().keys()))
|
91 |
+
print('decoder')
|
92 |
+
print(processor.decoder._alphabet.labels)
|
93 |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
94 |
print(list(sorted_vocab_dict))
|
95 |
|
.ipynb_checkpoints/log_openslr_SLR53_train[95%:]_predictions-checkpoint.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
.ipynb_checkpoints/log_openslr_SLR53_train[95%:]_targets-checkpoint.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
.ipynb_checkpoints/openslr_SLR53_train[95%:]_eval_results-checkpoint.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
WER: 0.15322879016421437
|
2 |
+
CER: 0.03413696666806267
|
.ipynb_checkpoints/special_tokens_map-checkpoint.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
alphabet.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "।", "ঁ", "ং", "ঃ", "অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "এ", "ঐ", "ও", "ঔ", "ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "শ", "ষ", "স", "হ", "়", "া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "্", "ৎ", "ৗ", "ড়", "ঢ়", "য়", "০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ৰ", "
|
|
|
1 |
+
{"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "", "", "œ", "।", "ঁ", "ং", "ঃ", "অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "এ", "ঐ", "ও", "ঔ", "ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "শ", "ষ", "স", "হ", "়", "া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "্", "ৎ", "ৗ", "ড়", "ঢ়", "য়", "০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ৰ", "", "", "", "⁇", "", "<s>", "</s>"], "is_bpe": false}
|
eval.py
CHANGED
@@ -87,6 +87,9 @@ def main(args):
|
|
87 |
model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
|
88 |
processor = transformers.AutoProcessor.from_pretrained(args.model_id)
|
89 |
vocab_dict = processor.tokenizer.get_vocab()
|
|
|
|
|
|
|
90 |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
91 |
print(list(sorted_vocab_dict))
|
92 |
|
|
|
87 |
model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
|
88 |
processor = transformers.AutoProcessor.from_pretrained(args.model_id)
|
89 |
vocab_dict = processor.tokenizer.get_vocab()
|
90 |
+
print(list(processor.tokenizer.get_vocab().keys()))
|
91 |
+
print('decoder')
|
92 |
+
print(processor.decoder._alphabet.labels)
|
93 |
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
94 |
print(list(sorted_vocab_dict))
|
95 |
|
log_openslr_SLR53_train[95%:]_predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
openslr_SLR53_train[95%:]_eval_results.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
WER: 0.
|
2 |
-
CER: 0.
|
|
|
1 |
+
WER: 0.15322879016421437
|
2 |
+
CER: 0.03413696666806267
|