Update README.md
Browse files
README.md
CHANGED
@@ -56,7 +56,7 @@ def speech_file_to_array_fn(batch):
|
|
56 |
return batch
|
57 |
|
58 |
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
59 |
-
inputs = processor(test_dataset["speech"]
|
60 |
|
61 |
with torch.no_grad():
|
62 |
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
|
@@ -64,7 +64,7 @@ with torch.no_grad():
|
|
64 |
predicted_ids = torch.argmax(logits, dim=-1)
|
65 |
|
66 |
print("Prediction:", processor.batch_decode(predicted_ids))
|
67 |
-
print("Reference:", test_dataset["sentence"]
|
68 |
```
|
69 |
|
70 |
|
@@ -114,7 +114,7 @@ processor = Wav2Vec2Processor.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-ch
|
|
114 |
model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
|
115 |
model.to("cuda")
|
116 |
|
117 |
-
chars_to_ignore_regex = '[
|
118 |
|
119 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
120 |
|
|
|
56 |
return batch
|
57 |
|
58 |
test_dataset = test_dataset.map(speech_file_to_array_fn)
|
59 |
+
inputs = processor(test_dataset[:2]["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
|
60 |
|
61 |
with torch.no_grad():
|
62 |
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
|
|
|
64 |
predicted_ids = torch.argmax(logits, dim=-1)
|
65 |
|
66 |
print("Prediction:", processor.batch_decode(predicted_ids))
|
67 |
+
print("Reference:", test_dataset[:2]["sentence"])
|
68 |
```
|
69 |
|
70 |
|
|
|
114 |
model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
|
115 |
model.to("cuda")
|
116 |
|
117 |
+
chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:"\\\\“\\\\%\\\\‘\\\\”\\\\�\\\\.\\\\⋯\\\\!\\\\-\\\\:\\\\–\\\\。\\\\》\\\\,\\\\)\\\\,\\\\?\\\\;\\\\~\\\\~\\\\…\\\\︰\\\\,\\\\(\\\\」\\\\‧\\\\《\\\\﹔\\\\、\\\\—\\\\/\\\\,\\\\「\\\\﹖\\\\·\\\\×\\\\̃\\\\̌\\\\ε\\\\λ\\\\μ\\\\и\\\\т\\\\─\\\\□\\\\〈\\\\〉\\\\『\\\\』\\\\ア\\\\オ\\\\カ\\\\チ\\\\ド\\\\ベ\\\\ャ\\\\ヤ\\\\ン\\\\・\\\\丶\\\\a\\\\b\\\\f\\\\g\\\\i\\\\n\\\\p\\\\t' + "\\\\']"
|
118 |
|
119 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
120 |
|