infinitejoy
/

wav2vec2-large-xls-r-300m-assamese

Automatic Speech Recognition

xlsr-fine-tuning

robust-speech-event

hf-asr-leaderboard

Model card Files Files and versions Metrics Training metrics Community

infinitejoy commited on Feb 11, 2022

Commit

f49535d

·

1 Parent(s): 08fd86d

Upload eval.py

Files changed (1) hide show

eval.py +11 -13

eval.py CHANGED Viewed

@@ -3,6 +3,7 @@ import argparse
 import re
 from typing import Dict
 from datasets import Audio, Dataset, load_dataset, load_metric
 from transformers import AutoFeatureExtractor, pipeline
@@ -51,18 +52,7 @@ def normalize_text(text: str) -> str:
     chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
-    text = re.sub(chars_to_ignore_regex, "", text.lower()) \
-        .replace("\\\\punkt", "") \
-        .replace("\\\\komma", "") \
-        .replace("è", "e") \
-        .replace("é", "e") \
-        .replace("î", "i") \
-        .replace("ü", "u") \
-        .replace("ÿ", "y") \
-        .replace("ô", "o") \
-        .replace("\\", "") \
-        .replace("/", "") \
-        .replace("|", "")
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!
@@ -89,7 +79,9 @@ def main(args):
     dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
     # load eval pipeline
-    asr = pipeline("automatic-speech-recognition", model=args.model_id)
     # map function to decode audio
     def map_to_pred(batch):
@@ -134,6 +126,12 @@ if __name__ == "__main__":
     parser.add_argument(
         "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
     )
     args = parser.parse_args()
     main(args)

 import re
 from typing import Dict
+import torch
 from datasets import Audio, Dataset, load_dataset, load_metric
 from transformers import AutoFeatureExtractor, pipeline
     chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
+    text = re.sub(chars_to_ignore_regex, "", text.lower())
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!
     dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
     # load eval pipeline
+    if args.device is None:
+        args.device = 0 if torch.cuda.is_available() else -1
+    asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
     # map function to decode audio
     def map_to_pred(batch):
     parser.add_argument(
         "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
     )
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=None,
+        help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
+    )
     args = parser.parse_args()
     main(args)