anas commited on
Commit
0594d22
1 Parent(s): 0e6facf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -18
README.md CHANGED
@@ -52,15 +52,15 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
52
  # Preprocessing the datasets.
53
  # We need to read the aduio files as arrays
54
  def speech_file_to_array_fn(batch):
55
- speech_array, sampling_rate = torchaudio.load(batch["path"])
56
- batch["speech"] = resampler(speech_array).squeeze().numpy()
57
- return batch
58
 
59
  test_dataset = test_dataset.map(speech_file_to_array_fn)
60
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
61
 
62
  with torch.no_grad():
63
- logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
64
 
65
  predicted_ids = torch.argmax(logits, dim=-1)
66
 
@@ -87,41 +87,41 @@ processor = Wav2Vec2Processor.from_pretrained("anas/wav2vec2-large-xlsr-arabic")
87
  model = Wav2Vec2ForCTC.from_pretrained("anas/wav2vec2-large-xlsr-arabic/")
88
  model.to("cuda")
89
 
90
- chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“]' # TODO: adapt this list to include all special characters you removed from the data
91
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
92
 
93
  # Preprocessing the datasets.
94
  # We need to read the aduio files as arrays
95
  def speech_file_to_array_fn(batch):
96
- batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
97
- speech_array, sampling_rate = torchaudio.load(batch["path"])
98
- batch["speech"] = resampler(speech_array).squeeze().numpy()
99
- return batch
100
 
101
  test_dataset = test_dataset.map(speech_file_to_array_fn)
102
 
103
  # Preprocessing the datasets.
104
  # We need to read the aduio files as arrays
105
  def evaluate(batch):
106
- inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
107
 
108
- with torch.no_grad():
109
- logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
110
 
111
- pred_ids = torch.argmax(logits, dim=-1)
112
- batch["pred_strings"] = processor.batch_decode(pred_ids)
113
- return batch
114
 
115
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
116
 
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
- **Test Result**: XX.XX % # TODO: write output of print here. IMPORTANT: Please remember to also replace {wer_result_on_test} at the top of with this value here. tags.
121
 
122
 
123
  ## Training
124
 
125
- The Common Voice `train`, `validation`, and ... datasets were used for training as well as ... and ... # TODO: adapt to state all the datasets that were used for training.
126
 
127
- The script used for training can be found [here](...) # TODO: fill in a link to your training script here. If you trained your model in a colab, simply fill in the link here. If you trained the model locally, it would be great if you could upload the training script on github and paste the link here.
 
52
  # Preprocessing the datasets.
53
  # We need to read the aduio files as arrays
54
  def speech_file_to_array_fn(batch):
55
+ \tspeech_array, sampling_rate = torchaudio.load(batch["path"])
56
+ \tbatch["speech"] = resampler(speech_array).squeeze().numpy()
57
+ \treturn batch
58
 
59
  test_dataset = test_dataset.map(speech_file_to_array_fn)
60
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
61
 
62
  with torch.no_grad():
63
+ \tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
64
 
65
  predicted_ids = torch.argmax(logits, dim=-1)
66
 
 
87
  model = Wav2Vec2ForCTC.from_pretrained("anas/wav2vec2-large-xlsr-arabic/")
88
  model.to("cuda")
89
 
90
+ chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“]' # TODO: adapt this list to include all special characters you removed from the data
91
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
92
 
93
  # Preprocessing the datasets.
94
  # We need to read the aduio files as arrays
95
  def speech_file_to_array_fn(batch):
96
+ \tbatch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
97
+ \tspeech_array, sampling_rate = torchaudio.load(batch["path"])
98
+ \tbatch["speech"] = resampler(speech_array).squeeze().numpy()
99
+ \treturn batch
100
 
101
  test_dataset = test_dataset.map(speech_file_to_array_fn)
102
 
103
  # Preprocessing the datasets.
104
  # We need to read the aduio files as arrays
105
  def evaluate(batch):
106
+ \tinputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
107
 
108
+ \twith torch.no_grad():
109
+ \t\tlogits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
110
 
111
+ \tpred_ids = torch.argmax(logits, dim=-1)
112
+ \tbatch["pred_strings"] = processor.batch_decode(pred_ids)
113
+ \treturn batch
114
 
115
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
116
 
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
+ **Test Result**: XX.XX %
121
 
122
 
123
  ## Training
124
 
125
+ The Common Voice `train`, `validation`, datasets were used for training
126
 
127
+ The script used for training can be found [here](...)