Jobaula commited on
Commit
5ff915f
1 Parent(s): ed3f45f

Model save

Browse files
Files changed (2) hide show
  1. README.md +28 -29
  2. generation_config.json +30 -6
README.md CHANGED
@@ -1,43 +1,42 @@
1
  ---
2
- language:
3
- - zh
4
  license: apache-2.0
 
5
  tags:
6
- - whisper-event
7
  - generated_from_trainer
8
  datasets:
9
- - mozilla-foundation/common_voice_11_0
10
  metrics:
11
  - wer
12
- base_model: openai/whisper-medium
13
  model-index:
14
- - name: Whisper medium nan-tw only char
15
  results:
16
  - task:
17
- type: automatic-speech-recognition
18
  name: Automatic Speech Recognition
 
19
  dataset:
20
- name: mozilla-foundation/common_voice_11_0 nan-tw
21
- type: mozilla-foundation/common_voice_11_0
22
  config: nan-tw
23
  split: test
24
  args: nan-tw
25
  metrics:
26
- - type: wer
27
- value: 45.2824427480916
28
- name: Wer
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
32
  should probably proofread and complete it, then remove this comment. -->
33
 
34
- # Whisper medium nan-tw only char
35
 
36
- This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the mozilla-foundation/common_voice_11_0 nan-tw dataset.
37
  It achieves the following results on the evaluation set:
38
- - Loss: 0.9944
39
- - Wer: 45.2824
40
- - Cer: 45.3667
 
41
 
42
  ## Model description
43
 
@@ -60,7 +59,7 @@ The following hyperparameters were used during training:
60
  - train_batch_size: 2
61
  - eval_batch_size: 2
62
  - seed: 42
63
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
64
  - lr_scheduler_type: linear
65
  - lr_scheduler_warmup_steps: 500
66
  - training_steps: 5000
@@ -68,18 +67,18 @@ The following hyperparameters were used during training:
68
 
69
  ### Training results
70
 
71
- | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
72
- |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|
73
- | 0.5832 | 1.04 | 1000 | 1.0634 | 56.3053 | 56.4745 |
74
- | 0.1467 | 2.08 | 2000 | 1.0407 | 50.9618 | 51.0112 |
75
- | 0.016 | 3.13 | 3000 | 1.0226 | 46.4427 | 46.5137 |
76
- | 0.0001 | 5.01 | 4000 | 0.9974 | 45.4656 | 45.6082 |
77
- | 0.0001 | 6.05 | 5000 | 0.9944 | 45.2824 | 45.3667 |
78
 
79
 
80
  ### Framework versions
81
 
82
- - Transformers 4.27.0.dev0
83
- - Pytorch 1.13.1+cu117
84
- - Datasets 2.8.0
85
- - Tokenizers 0.13.2
 
1
  ---
2
+ library_name: transformers
 
3
  license: apache-2.0
4
+ base_model: openai/whisper-medium
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
+ - audiofolder
9
  metrics:
10
  - wer
 
11
  model-index:
12
+ - name: openai/whisper-medium
13
  results:
14
  - task:
 
15
  name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
  dataset:
18
+ name: audiofolder
19
+ type: audiofolder
20
  config: nan-tw
21
  split: test
22
  args: nan-tw
23
  metrics:
24
+ - name: Wer
25
+ type: wer
26
+ value: 0.9615384615384616
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
+ # openai/whisper-medium
33
 
34
+ This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the audiofolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.0141
37
+ - Model Preparation Time: 0.0121
38
+ - Wer: 0.9615
39
+ - Cer: 0.9524
40
 
41
  ## Model description
42
 
 
59
  - train_batch_size: 2
60
  - eval_batch_size: 2
61
  - seed: 42
62
+ - optimizer: Use adamw_bnb_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
63
  - lr_scheduler_type: linear
64
  - lr_scheduler_warmup_steps: 500
65
  - training_steps: 5000
 
67
 
68
  ### Training results
69
 
70
+ | Training Loss | Epoch | Step | Validation Loss | Model Preparation Time | Wer | Cer |
71
+ |:-------------:|:------:|:----:|:---------------:|:----------------------:|:-------:|:-------:|
72
+ | 0.97 | 0.2 | 1000 | 0.7356 | 0.0121 | 38.1731 | 38.4762 |
73
+ | 0.3044 | 1.0388 | 2000 | 0.3099 | 0.0121 | 23.4615 | 23.9048 |
74
+ | 0.3108 | 1.2388 | 3000 | 0.1153 | 0.0121 | 7.5 | 7.7143 |
75
+ | 0.0544 | 2.0776 | 4000 | 0.0295 | 0.0121 | 2.3077 | 2.2857 |
76
+ | 0.0678 | 2.2776 | 5000 | 0.0141 | 0.0121 | 0.9615 | 0.9524 |
77
 
78
 
79
  ### Framework versions
80
 
81
+ - Transformers 4.47.0.dev0
82
+ - Pytorch 2.5.1+cu121
83
+ - Datasets 3.1.0
84
+ - Tokenizers 0.20.3
generation_config.json CHANGED
@@ -1,4 +1,30 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "begin_suppress_tokens": [
3
  220,
4
  50257
@@ -14,10 +40,6 @@
14
  [
15
  2,
16
  50359
17
- ],
18
- [
19
- 3,
20
- 50363
21
  ]
22
  ],
23
  "is_multilingual": true,
@@ -122,10 +144,12 @@
122
  "<|yo|>": 50325,
123
  "<|zh|>": 50260
124
  },
125
- "max_initial_timestamp_index": 1,
126
  "max_length": 448,
127
  "no_timestamps_token_id": 50363,
128
  "pad_token_id": 50257,
 
 
129
  "suppress_tokens": [
130
  1,
131
  2,
@@ -220,5 +244,5 @@
220
  "transcribe": 50359,
221
  "translate": 50358
222
  },
223
- "transformers_version": "4.27.0.dev0"
224
  }
 
1
  {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
  "begin_suppress_tokens": [
29
  220,
30
  50257
 
40
  [
41
  2,
42
  50359
 
 
 
 
43
  ]
44
  ],
45
  "is_multilingual": true,
 
144
  "<|yo|>": 50325,
145
  "<|zh|>": 50260
146
  },
147
+ "max_initial_timestamp_index": 50,
148
  "max_length": 448,
149
  "no_timestamps_token_id": 50363,
150
  "pad_token_id": 50257,
151
+ "prev_sot_token_id": 50361,
152
+ "return_timestamps": false,
153
  "suppress_tokens": [
154
  1,
155
  2,
 
244
  "transcribe": 50359,
245
  "translate": 50358
246
  },
247
+ "transformers_version": "4.47.0.dev0"
248
  }