kiranpantha
/

whisper-large-v3-nepali

@@ -11,7 +11,7 @@ datasets:
 metrics:
 - wer
 model-index:
-- name: Whisper Large v3  Nepali - Kiran Pantha
   results:
   - task:
       name: Automatic Speech Recognition
@@ -25,19 +25,19 @@ model-index:
     metrics:
     - name: Wer
       type: wer
-      value: 20.48611111111111
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# Whisper Large v3  Nepali - Kiran Pantha
 This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the OpenSLR54 dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1507
-- Wer: 20.4861
-- Cer: 4.9839
 ## Model description
@@ -57,10 +57,10 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
-- train_batch_size: 16
 - eval_batch_size: 8
 - seed: 42
-- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
 - training_steps: 5000
@@ -70,27 +70,27 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Cer     | Validation Loss | Wer     |
 |:-------------:|:------:|:----:|:-------:|:---------------:|:-------:|
-| 0.209         | 0.3597 | 300  | 10.9916 | 0.2021          | 41.1574 |
-| 0.1714        | 0.7194 | 600  | 8.9474  | 0.1755          | 35.7176 |
-| 0.101         | 1.0791 | 900  | 7.5565  | 0.1485          | 29.6991 |
-| 0.0902        | 1.4388 | 1200 | 7.2372  | 0.1396          | 28.2407 |
-| 0.0872        | 1.7986 | 1500 | 7.8024  | 0.1319          | 27.9861 |
-| 0.0453        | 2.1583 | 1800 | 6.3344  | 0.1374          | 26.2269 |
-| 0.0368        | 2.5180 | 2100 | 6.1766  | 0.1381          | 25.2315 |
-| 0.0472        | 2.8777 | 2400 | 5.8316  | 0.1316          | 24.1435 |
-| 0.0191        | 3.2374 | 2700 | 5.8059  | 0.1356          | 24.0278 |
-| 0.0185        | 3.5971 | 3000 | 5.5674  | 0.1376          | 23.125  |
-| 0.0182        | 3.9568 | 3300 | 5.5123  | 0.1360          | 23.0556 |
-| 0.0074        | 4.3165 | 3600 | 5.2077  | 0.1428          | 21.7130 |
-| 0.0086        | 4.6763 | 3900 | 5.1784  | 0.1433          | 21.2731 |
-| 0.0031        | 5.0360 | 4200 | 0.1421  | 21.1806         | 5.0279  |
-| 0.0024        | 5.3957 | 4500 | 0.1482  | 20.7870         | 4.9912  |
-| 0.0014        | 5.7554 | 4800 | 0.1507  | 20.4861         | 4.9839  |
 ### Framework versions
-- Transformers 4.46.3
 - Pytorch 2.5.1+cxx11.abi
 - Datasets 3.2.0
-- Tokenizers 0.20.3

 metrics:
 - wer
 model-index:
+- name: Whisper Large v3 Turbo Nepali - Kiran Pantha
   results:
   - task:
       name: Automatic Speech Recognition
     metrics:
     - name: Wer
       type: wer
+      value: 18.72503840245776
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Large v3 Turbo Nepali - Kiran Pantha
 This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the OpenSLR54 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0876
+- Wer: 18.7250
+- Cer: 4.4861
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
+- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
 - training_steps: 5000
 | Training Loss | Epoch  | Step | Cer     | Validation Loss | Wer     |
 |:-------------:|:------:|:----:|:-------:|:---------------:|:-------:|
+| 0.2266        | 0.1200 | 300  | 11.9034 | 0.2345          | 44.7619 |
+| 0.208         | 0.2399 | 600  | 11.3157 | 0.2132          | 41.1060 |
+| 0.185         | 0.3599 | 900  | 9.4204  | 0.1753          | 35.6068 |
+| 0.1567        | 0.4798 | 1200 | 8.8596  | 0.1634          | 33.9324 |
+| 0.1411        | 0.5998 | 1500 | 8.7004  | 0.1523          | 33.0568 |
+| 0.1377        | 0.7197 | 1800 | 7.3120  | 0.1371          | 29.7849 |
+| 0.1147        | 0.8397 | 2100 | 7.0010  | 0.1332          | 27.7112 |
+| 0.1116        | 0.9596 | 2400 | 6.5798  | 0.1212          | 26.3287 |
+| 0.0757        | 1.0796 | 2700 | 6.1268  | 0.1193          | 24.7773 |
+| 0.0609        | 1.1995 | 3000 | 5.8991  | 0.1154          | 24.6237 |
+| 0.0612        | 1.3195 | 3300 | 5.2599  | 0.1091          | 22.0737 |
+| 0.0627        | 1.4394 | 3600 | 5.3579  | 0.1045          | 21.6283 |
+| 0.0582        | 1.5594 | 3900 | 5.1938  | 0.0995          | 21.5054 |
+| 0.0551        | 1.6793 | 4200 | 4.7947  | 0.0956          | 19.8771 |
+| 0.052         | 1.7993 | 4500 | 4.5473  | 0.0897          | 19.1244 |
+| 0.0438        | 1.9192 | 4800 | 0.0876  | 18.7250         | 4.4861  |
 ### Framework versions
+- Transformers 4.47.1
 - Pytorch 2.5.1+cxx11.abi
 - Datasets 3.2.0
+- Tokenizers 0.21.0

generation_config.json CHANGED Viewed

@@ -48,16 +48,6 @@
   "bos_token_id": 50257,
   "decoder_start_token_id": 50258,
   "eos_token_id": 50257,
-  "forced_decoder_ids": [
-    [
-      1,
-      null
-    ],
-    [
-      2,
-      50360
-    ]
-  ],
   "is_multilingual": true,
   "lang_to_id": {
     "<|af|>": 50327,
@@ -263,5 +253,5 @@
     "transcribe": 50360,
     "translate": 50359
   },
-  "transformers_version": "4.46.3"
 }

   "bos_token_id": 50257,
   "decoder_start_token_id": 50258,
   "eos_token_id": 50257,
   "is_multilingual": true,
   "lang_to_id": {
     "<|af|>": 50327,
     "transcribe": 50360,
     "translate": 50359
   },
+  "transformers_version": "4.47.1"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5bba7fa23d3a06d94ee319279e4e28ea4089eb212f82300fe72da6bfd14f5ed1
 size 4993448880

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f883dcabd631321110701e05f61994998c3ca7ce015c9161ac9fe7d7e036b91
 size 4993448880

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9eaf523e50340add3b139a6f6a53c51c571c74852871e6b4e93a553268ce55a3
 size 1180663192

 version https://git-lfs.github.com/spec/v1
+oid sha256:89b5bb57ec7c4e57d7cb9a440a3c2c772ddaafa7122c5869415540e3c1516703
 size 1180663192