Update eval.py
Browse files
eval.py
CHANGED
@@ -6,6 +6,7 @@ from typing import Dict
|
|
6 |
import torch
|
7 |
from datasets import Audio, Dataset, load_dataset, load_metric
|
8 |
from num2words import num2words as n2w
|
|
|
9 |
|
10 |
from transformers import AutoFeatureExtractor, AutoModelForCTC, pipeline, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM, Wav2Vec2FeatureExtractor
|
11 |
# from pyctcdecode import BeamSearchDecoderCTC
|
@@ -17,7 +18,7 @@ def log_results(result: Dataset, args: Dict[str, str]):
|
|
17 |
log_outputs = args.log_outputs
|
18 |
lm = "withLM" if args.use_lm else "noLM"
|
19 |
model_id = args.model_id.replace("/", "_").replace(".", "")
|
20 |
-
dataset_id = "_".join([model_id] + args.dataset.split("/") + [args.config, args.split, lm])
|
21 |
|
22 |
# load metric
|
23 |
wer = load_metric("wer")
|
@@ -203,7 +204,7 @@ if __name__ == "__main__":
|
|
203 |
"--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
|
204 |
)
|
205 |
parser.add_argument(
|
206 |
-
"--filter", type=str, default="", help="Simple filter on attributes. *E.g.* `region_of_youth:Troms` would
|
207 |
)
|
208 |
parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
|
209 |
parser.add_argument(
|
|
|
6 |
import torch
|
7 |
from datasets import Audio, Dataset, load_dataset, load_metric
|
8 |
from num2words import num2words as n2w
|
9 |
+
from slugify import slugify
|
10 |
|
11 |
from transformers import AutoFeatureExtractor, AutoModelForCTC, pipeline, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM, Wav2Vec2FeatureExtractor
|
12 |
# from pyctcdecode import BeamSearchDecoderCTC
|
|
|
18 |
log_outputs = args.log_outputs
|
19 |
lm = "withLM" if args.use_lm else "noLM"
|
20 |
model_id = args.model_id.replace("/", "_").replace(".", "")
|
21 |
+
dataset_id = "_".join([model_id] + args.dataset.split("/") + [args.config, slugify(args.filter), args.split, lm])
|
22 |
|
23 |
# load metric
|
24 |
wer = load_metric("wer")
|
|
|
204 |
"--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
|
205 |
)
|
206 |
parser.add_argument(
|
207 |
+
"--filter", type=str, default="", help="Simple filter on attributes. *E.g.* `region_of_youth:Troms` would pnly keep those samplesfor which the condition is met"
|
208 |
)
|
209 |
parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
|
210 |
parser.add_argument(
|