geninhu commited on
Commit
1e42e53
1 Parent(s): 052904e

Training in progress, step 1000

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:066a4bd36d31aed8395dd3cd4af6a1b5224e7f6bdce73c3d445243e4ad5fc29d
3
  size 3055754841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de3a6d93fb07b3b0d98ebec9911cf2e3ca0bdb3518687fb8882b35497d8c94d
3
  size 3055754841
run.sh CHANGED
@@ -5,13 +5,14 @@ python run_speech_recognition_seq2seq_streaming.py \
5
  --language="vietnamese" \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
- --model_index_name="Whisper Medium Vietnamese Vivos" \
9
  --max_steps="5000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="32" \
12
- --per_device_eval_batch_size="16" \
 
13
  --logging_steps="25" \
14
- --learning_rate="1e-6" \
15
  --warmup_steps="500" \
16
  --evaluation_strategy="steps" \
17
  --eval_steps="1000" \
@@ -25,7 +26,6 @@ python run_speech_recognition_seq2seq_streaming.py \
25
  --report_to="tensorboard" \
26
  --metric_for_best_model="wer" \
27
  --greater_is_better="False" \
28
- --gradient_accumulation_steps="1" \
29
  --load_best_model_at_end \
30
  --gradient_checkpointing \
31
  --fp16 \
@@ -34,5 +34,6 @@ python run_speech_recognition_seq2seq_streaming.py \
34
  --do_eval \
35
  --predict_with_generate \
36
  --do_normalize_eval \
 
37
  --use_auth_token \
38
- --push_to_hub
 
5
  --language="vietnamese" \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
+ --model_index_name="Whisper Medium Vietnamese" \
9
  --max_steps="5000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="32" \
12
+ --per_device_eval_batch_size="32" \
13
+ --gradient_accumulation_steps="2" \
14
  --logging_steps="25" \
15
+ --learning_rate="1e-5" \
16
  --warmup_steps="500" \
17
  --evaluation_strategy="steps" \
18
  --eval_steps="1000" \
 
26
  --report_to="tensorboard" \
27
  --metric_for_best_model="wer" \
28
  --greater_is_better="False" \
 
29
  --load_best_model_at_end \
30
  --gradient_checkpointing \
31
  --fp16 \
 
34
  --do_eval \
35
  --predict_with_generate \
36
  --do_normalize_eval \
37
+ --streaming \
38
  --use_auth_token \
39
+ --push_to_hub
run_speech_recognition_seq2seq_streaming.py CHANGED
@@ -415,7 +415,7 @@ def main():
415
  # 4. Load dataset
416
  dataset_names = ["mozilla-foundation/common_voice_11_0", "mozilla-foundation/common_voice_11_0", "vivos", "vivos"]
417
  dataset_config_names = ["vi", "vi", None, None]
418
- text_column_names = ["sentence", "sentence", "sentence", "sentence"]
419
  splits = ['train', 'validation', 'train', 'test']
420
 
421
  raw_datasets = IterableDatasetDict()
 
415
  # 4. Load dataset
416
  dataset_names = ["mozilla-foundation/common_voice_11_0", "mozilla-foundation/common_voice_11_0", "vivos", "vivos"]
417
  dataset_config_names = ["vi", "vi", None, None]
418
+ text_column_names = ["sentence", "sentence", "sentence", "sentence"]
419
  splits = ['train', 'validation', 'train', 'test']
420
 
421
  raw_datasets = IterableDatasetDict()
runs/Dec16_14-32-26_129-213-131-48/1671201160.8968897/events.out.tfevents.1671201160.129-213-131-48.2969237.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fbefe6b792f6d3495e9600d5939f5bf16679802daa37806cf0dc028923e44b
3
+ size 5870
runs/Dec16_14-32-26_129-213-131-48/events.out.tfevents.1671201160.129-213-131-48.2969237.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37f2986e473f5ed8943f7ef3ea97e844574456634125bdfc60091ac1ad6b846
3
+ size 10899
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c7d8508659c5d42ba410fbec08857ff731f35eecf4e825059dfa8cb5e29cd5d
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4bd990ffd86596f3507ce3d35a1a36cc02e044793d691e47ee94fbf31352539
3
  size 3579