add eval

Browse files

Files changed (11) hide show

.ipynb_checkpoints/added_tokens-checkpoint.json +1 -0
.ipynb_checkpoints/alphabet-checkpoint.json +1 -1
.ipynb_checkpoints/eval-checkpoint.py +3 -0
.ipynb_checkpoints/log_openslr_SLR53_train[95%:]_predictions-checkpoint.txt +0 -0
.ipynb_checkpoints/log_openslr_SLR53_train[95%:]_targets-checkpoint.txt +0 -0
.ipynb_checkpoints/openslr_SLR53_train[95%:]_eval_results-checkpoint.txt +2 -0
.ipynb_checkpoints/special_tokens_map-checkpoint.json +1 -0
alphabet.json +1 -1
eval.py +3 -0
log_openslr_SLR53_train[95%:]_predictions.txt +0 -0
openslr_SLR53_train[95%:]_eval_results.txt +2 -2

.ipynb_checkpoints/added_tokens-checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<s>": 110, "</s>": 111}

.ipynb_checkpoints/alphabet-checkpoint.json CHANGED Viewed

@@ -1 +1 @@

- {"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "।", "ঁ", "ং", "ঃ", "অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "এ", "ঐ", "ও", "ঔ", "ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "শ", "ষ", "স", "হ", "়", "া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "্", "ৎ", "ৗ", "ড়", "ঢ়", "য়", "০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ৰ", "~~\u200c~~", "~~\u200d~~", "~~\u200e~~", "~~[pad]~~", "<s>", "</s>"], "is_bpe": false}

+ {"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "", "", "œ", "।", "ঁ", "ং", "ঃ", "অ", "আ", "ই", "ঈ", "উ", "ঊ", "ঋ", "এ", "ঐ", "ও", "ঔ", "ক", "খ", "গ", "ঘ", "ঙ", "চ", "ছ", "জ", "ঝ", "ঞ", "ট", "ঠ", "ড", "ঢ", "ণ", "ত", "থ", "দ", "ধ", "ন", "প", "ফ", "ব", "ভ", "ম", "য", "র", "ল", "শ", "ষ", "স", "হ", "়", "া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "্", "ৎ", "ৗ", "ড়", "ঢ়", "য়", "০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯", "ৰ", "‌", "‍", "‎", "⁇", "", "<s>", "</s>"], "is_bpe": false}

.ipynb_checkpoints/eval-checkpoint.py CHANGED Viewed

@@ -87,6 +87,9 @@ def main(args):
     model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
     processor = transformers.AutoProcessor.from_pretrained(args.model_id)
     vocab_dict = processor.tokenizer.get_vocab()
     sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
     print(list(sorted_vocab_dict))

     model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
     processor = transformers.AutoProcessor.from_pretrained(args.model_id)
     vocab_dict = processor.tokenizer.get_vocab()
+    print(list(processor.tokenizer.get_vocab().keys()))
+    print('decoder')
+    print(processor.decoder._alphabet.labels)
     sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
     print(list(sorted_vocab_dict))

.ipynb_checkpoints/log_openslr_SLR53_train[95%:]_predictions-checkpoint.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/log_openslr_SLR53_train[95%:]_targets-checkpoint.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/openslr_SLR53_train[95%:]_eval_results-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ WER: 0.15322879016421437
2	+ CER: 0.03413696666806267

.ipynb_checkpoints/special_tokens_map-checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

alphabet.json CHANGED Viewed

@@ -1 +1 @@

eval.py CHANGED Viewed

@@ -87,6 +87,9 @@ def main(args):
     model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
     processor = transformers.AutoProcessor.from_pretrained(args.model_id)
     vocab_dict = processor.tokenizer.get_vocab()
     sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
     print(list(sorted_vocab_dict))

     model = transformers.Wav2Vec2ForCTC.from_pretrained(args.model_id)
     processor = transformers.AutoProcessor.from_pretrained(args.model_id)
     vocab_dict = processor.tokenizer.get_vocab()
+    print(list(processor.tokenizer.get_vocab().keys()))
+    print('decoder')
+    print(processor.decoder._alphabet.labels)
     sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
     print(list(sorted_vocab_dict))

log_openslr_SLR53_train[95%:]_predictions.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

openslr_SLR53_train[95%:]_eval_results.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- WER: 0.~~21726385291857586~~
2	- CER: 0.~~04725010353701041~~


1	+ WER: 0.15322879016421437
2	+ CER: 0.03413696666806267