pere commited on
Commit
f45805f
·
1 Parent(s): d740190
Files changed (2) hide show
  1. run.sh +3 -2
  2. run_speech_recognition_ctc.py +1 -1
run.sh CHANGED
@@ -1,10 +1,11 @@
1
  python run_speech_recognition_ctc.py \
2
  --dataset_name="NbAiLab/NPSC" \
3
  --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
4
- --dataset_config_name="16K_mp3" \
 
5
  --output_dir="./" \
6
  --overwrite_output_dir \
7
- --num_train_epochs="50" \
8
  --per_device_train_batch_size="8" \
9
  --per_device_eval_batch_size="8" \
10
  --gradient_accumulation_steps="4" \
 
1
  python run_speech_recognition_ctc.py \
2
  --dataset_name="NbAiLab/NPSC" \
3
  --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
4
+ --hub_model_id="NbAiLab/wav2vec2-xlsr-300M-NPSC" \
5
+ --dataset_config_name="16K_mp3" \
6
  --output_dir="./" \
7
  --overwrite_output_dir \
8
+ --num_train_epochs="75" \
9
  --per_device_train_batch_size="8" \
10
  --per_device_eval_batch_size="8" \
11
  --gradient_accumulation_steps="4" \
run_speech_recognition_ctc.py CHANGED
@@ -393,7 +393,7 @@ def main():
393
 
394
  # Pre-processing dataset
395
  import re
396
- def filter_inaudible(entry):
397
  return not re.search("\d|<inaudible>", entry["text"], flags=re.IGNORECASE)
398
 
399
  def filter_nynorsk(entry):
 
393
 
394
  # Pre-processing dataset
395
  import re
396
+ def filter_inaudible(entry):
397
  return not re.search("\d|<inaudible>", entry["text"], flags=re.IGNORECASE)
398
 
399
  def filter_nynorsk(entry):