carlosdanielhernandezmena
commited on
Commit
•
6f22a10
1
Parent(s):
e782413
Updating the example code
Browse files
README.md
CHANGED
@@ -137,57 +137,56 @@ The fine-tuning process was perform during November (2022) in the servers of the
|
|
137 |
import torch
|
138 |
from transformers import Wav2Vec2Processor
|
139 |
from transformers import Wav2Vec2ForCTC
|
|
|
140 |
#Load the processor and model.
|
141 |
MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-spanish-ep5-944h"
|
142 |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
143 |
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
|
|
|
144 |
#Load the dataset
|
145 |
from datasets import load_dataset, load_metric, Audio
|
146 |
ds=load_dataset("ciempiess/ciempiess_test", split="test")
|
147 |
-
|
148 |
-
import re
|
149 |
-
chars_to_ignore_regex = '[\\,\\?\\.\\!\\\;\\:\\"\\“\\%\\‘\\”\\�\\)\\(\\*)]'
|
150 |
-
def remove_special_characters(batch):
|
151 |
-
batch["normalized_text"] = re.sub(chars_to_ignore_regex, '', batch["normalized_text"]).lower()
|
152 |
-
return batch
|
153 |
-
ds = ds.map(remove_special_characters)
|
154 |
#Downsample to 16kHz
|
155 |
ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
|
|
|
156 |
#Process the dataset
|
157 |
def prepare_dataset(batch):
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
ds = ds.map(prepare_dataset, remove_columns=ds.column_names,num_proc=1)
|
|
|
165 |
#Define the evaluation metric
|
166 |
import numpy as np
|
167 |
wer_metric = load_metric("wer")
|
168 |
def compute_metrics(pred):
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
177 |
#Do the evaluation (with batch_size=1)
|
178 |
model = model.to(torch.device("cuda"))
|
179 |
def map_to_result(batch):
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
results = ds.map(map_to_result,remove_columns=ds.column_names)
|
188 |
-
#Compute the overall WER now.
|
189 |
-
print("Test WER: {:.3f}".format(wer_metric.compute(predictions=results["pred_str"], references=results["normalized_text"])))
|
190 |
|
|
|
|
|
191 |
```
|
192 |
**Test Result**: 0.112
|
193 |
# BibTeX entry and citation info
|
|
|
137 |
import torch
|
138 |
from transformers import Wav2Vec2Processor
|
139 |
from transformers import Wav2Vec2ForCTC
|
140 |
+
|
141 |
#Load the processor and model.
|
142 |
MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-spanish-ep5-944h"
|
143 |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
144 |
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
|
145 |
+
|
146 |
#Load the dataset
|
147 |
from datasets import load_dataset, load_metric, Audio
|
148 |
ds=load_dataset("ciempiess/ciempiess_test", split="test")
|
149 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
#Downsample to 16kHz
|
151 |
ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
|
152 |
+
|
153 |
#Process the dataset
|
154 |
def prepare_dataset(batch):
|
155 |
+
audio = batch["audio"]
|
156 |
+
#Batched output is "un-batched" to ensure mapping is correct
|
157 |
+
batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
|
158 |
+
with processor.as_target_processor():
|
159 |
+
batch["labels"] = processor(batch["normalized_text"]).input_ids
|
160 |
+
return batch
|
161 |
ds = ds.map(prepare_dataset, remove_columns=ds.column_names,num_proc=1)
|
162 |
+
|
163 |
#Define the evaluation metric
|
164 |
import numpy as np
|
165 |
wer_metric = load_metric("wer")
|
166 |
def compute_metrics(pred):
|
167 |
+
pred_logits = pred.predictions
|
168 |
+
pred_ids = np.argmax(pred_logits, axis=-1)
|
169 |
+
pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
|
170 |
+
pred_str = processor.batch_decode(pred_ids)
|
171 |
+
#We do not want to group tokens when computing the metrics
|
172 |
+
label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
|
173 |
+
wer = wer_metric.compute(predictions=pred_str, references=label_str)
|
174 |
+
return {"wer": wer}
|
175 |
+
|
176 |
#Do the evaluation (with batch_size=1)
|
177 |
model = model.to(torch.device("cuda"))
|
178 |
def map_to_result(batch):
|
179 |
+
with torch.no_grad():
|
180 |
+
input_values = torch.tensor(batch["input_values"], device="cuda").unsqueeze(0)
|
181 |
+
logits = model(input_values).logits
|
182 |
+
pred_ids = torch.argmax(logits, dim=-1)
|
183 |
+
batch["pred_str"] = processor.batch_decode(pred_ids)[0]
|
184 |
+
batch["sentence"] = processor.decode(batch["labels"], group_tokens=False)
|
185 |
+
return batch
|
186 |
results = ds.map(map_to_result,remove_columns=ds.column_names)
|
|
|
|
|
187 |
|
188 |
+
#Compute the overall WER now.
|
189 |
+
print("Test WER: {:.3f}".format(wer_metric.compute(predictions=results["pred_str"], references=results["sentence"])))
|
190 |
```
|
191 |
**Test Result**: 0.112
|
192 |
# BibTeX entry and citation info
|