jonatasgrosman
commited on
Commit
•
ab1e0f4
1
Parent(s):
d12252b
update README
Browse files
README.md
CHANGED
@@ -27,7 +27,7 @@ model-index:
|
|
27 |
value: 13.60
|
28 |
- name: Test CER
|
29 |
type: cer
|
30 |
-
value:
|
31 |
---
|
32 |
|
33 |
# Wav2Vec2-Large-XLSR-53-Dutch
|
@@ -101,12 +101,16 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
101 |
LANG_ID = "nl"
|
102 |
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-dutch"
|
103 |
DEVICE = "cuda"
|
|
|
104 |
|
105 |
CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
|
106 |
"؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
|
107 |
"=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
|
108 |
|
109 |
test_dataset = load_dataset("common_voice", LANG_ID, split="test")
|
|
|
|
|
|
|
110 |
wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
|
111 |
cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
|
112 |
|
@@ -119,9 +123,11 @@ model.to(DEVICE)
|
|
119 |
# Preprocessing the datasets.
|
120 |
# We need to read the audio files as arrays
|
121 |
def speech_file_to_array_fn(batch):
|
122 |
-
|
123 |
-
|
|
|
124 |
batch["speech"] = speech_array
|
|
|
125 |
return batch
|
126 |
|
127 |
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
@@ -138,13 +144,13 @@ def evaluate(batch):
|
|
138 |
batch["pred_strings"] = processor.batch_decode(pred_ids)
|
139 |
return batch
|
140 |
|
141 |
-
result = test_dataset.map(evaluate, batched=True, batch_size=
|
142 |
|
143 |
-
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=
|
144 |
-
print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=
|
145 |
```
|
146 |
|
147 |
**Test Result**:
|
148 |
|
149 |
- WER: 13.60%
|
150 |
-
- CER:
|
|
|
27 |
value: 13.60
|
28 |
- name: Test CER
|
29 |
type: cer
|
30 |
+
value: 4.45
|
31 |
---
|
32 |
|
33 |
# Wav2Vec2-Large-XLSR-53-Dutch
|
|
|
101 |
LANG_ID = "nl"
|
102 |
MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-dutch"
|
103 |
DEVICE = "cuda"
|
104 |
+
MAX_SAMPLES = 8000
|
105 |
|
106 |
CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
|
107 |
"؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
|
108 |
"=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
|
109 |
|
110 |
test_dataset = load_dataset("common_voice", LANG_ID, split="test")
|
111 |
+
if len(test_dataset) > MAX_SAMPLES:
|
112 |
+
test_dataset = test_dataset.select(range(MAX_SAMPLES))
|
113 |
+
|
114 |
wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
|
115 |
cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
|
116 |
|
|
|
123 |
# Preprocessing the datasets.
|
124 |
# We need to read the audio files as arrays
|
125 |
def speech_file_to_array_fn(batch):
|
126 |
+
with warnings.catch_warnings():
|
127 |
+
warnings.simplefilter("ignore")
|
128 |
+
speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
|
129 |
batch["speech"] = speech_array
|
130 |
+
batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).upper()
|
131 |
return batch
|
132 |
|
133 |
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
|
|
144 |
batch["pred_strings"] = processor.batch_decode(pred_ids)
|
145 |
return batch
|
146 |
|
147 |
+
result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
148 |
|
149 |
+
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
|
150 |
+
print("CER: {:2f}".format(100 * cer.compute(predictions=result["pred_strings"], references=result["sentence"], chunk_size=1000)))
|
151 |
```
|
152 |
|
153 |
**Test Result**:
|
154 |
|
155 |
- WER: 13.60%
|
156 |
+
- CER: 4.45%
|