huseinzol05 commited on
Commit
276857a
·
2 Parent(s): 57e8f8f d154450

Merge branch 'main' of https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed into main

Browse files
Files changed (2) hide show
  1. README.md +16 -16
  2. evaluate-gpu.ipynb +63 -151
README.md CHANGED
@@ -35,37 +35,37 @@ It achieves the following results on the evaluation set based on [evaluate-wav2v
35
  Mixed evaluation,
36
 
37
  ```
38
- CER: 0.04363189219453221
39
- WER: 0.12446419219809059
40
- CER with LM: 0.03621180629932558
41
- WER with LM: 0.09152993800218129
42
  ```
43
 
44
  Malay evaluation,
45
 
46
  ```
47
- CER: 0.053659683623049854
48
- WER: 0.22565751242221832
49
- CER with LM: 0.036930421149001316
50
- WER with LM: 0.14256712242006359
51
  ```
52
 
53
  Singlish evaluation,
54
 
55
  ```
56
- CER: 0.04174804195104746
57
- WER: 0.10734402150682842
58
- CER with LM: 0.03538238462620066
59
- WER with LM: 0.08103191123663189
60
  ```
61
 
62
  Mandarin evaluation,
63
 
64
  ```
65
- CER: 0.04211892733885779
66
- WER: 0.09817787449869257
67
- CER with LM: 0.040151154521006656
68
- WER with LM: 0.08913415903511501
69
  ```
70
 
71
  Language model from https://huggingface.co/huseinzol05/language-model-bahasa-manglish-combined
 
35
  Mixed evaluation,
36
 
37
  ```
38
+ CER: 0.0481054244857041
39
+ WER: 0.1322198446007387
40
+ CER with LM: 0.041196586938584696
41
+ WER with LM: 0.09880169127621556
42
  ```
43
 
44
  Malay evaluation,
45
 
46
  ```
47
+ CER: 0.051636391937588406
48
+ WER: 0.19561999547293663
49
+ CER with LM: 0.03917689630621449
50
+ WER with LM: 0.12710746406824835
51
  ```
52
 
53
  Singlish evaluation,
54
 
55
  ```
56
+ CER: 0.0494915200071987
57
+ WER: 0.12763802881676573
58
+ CER with LM: 0.04271234986432335
59
+ WER with LM: 0.09677160640413336
60
  ```
61
 
62
  Mandarin evaluation,
63
 
64
  ```
65
+ CER: 0.035626554824269824
66
+ WER: 0.07993515937860181
67
+ CER with LM: 0.03487760945087219
68
+ WER with LM: 0.07536807168546154
69
  ```
70
 
71
  Language model from https://huggingface.co/huseinzol05/language-model-bahasa-manglish-combined
evaluate-gpu.ipynb CHANGED
@@ -158,22 +158,22 @@
158
  {
159
  "data": {
160
  "text/plain": [
161
- "[('malay-test/101.wav',\n",
162
- " 'kenapa jews dan israelis mengejek iranian bukan agama lebih'),\n",
163
- " ('singlish-test/978.wav',\n",
164
- " 'but in the olympics time does not really matter what matters is winning'),\n",
165
- " ('singlish-test/1189.wav',\n",
166
- " 'and if joseph schooling was born with a tinier hand he might not have hit the wall first'),\n",
167
- " ('singlish-test/1774.wav', 'melissa passed her number to the young man'),\n",
168
- " ('singlish-test/21.wav',\n",
169
- " 'but he really wanted to make satay and hung around satay sellers and memorized their ingredients'),\n",
170
- " ('singlish-test/2164.wav', 'just check out their coverage'),\n",
171
- " ('malay-test/397.wav', 'budaya cocorico french culture'),\n",
172
- " ('mandarin-test/359.wav', 'xi shou jian deng da kai'),\n",
173
- " ('singlish-test/1107.wav',\n",
174
- " 'the party had a cool vibe but was for guests only'),\n",
175
- " ('singlish-test/285.wav',\n",
176
- " 'but in smaller towns there are not so many places and that is where they are most needed')]"
177
  ]
178
  },
179
  "execution_count": 8,
@@ -252,7 +252,7 @@
252
  "outputs": [],
253
  "source": [
254
  "model = AutoModelForCTC.from_pretrained(\n",
255
- " './wav2vec2-mixed-v3/checkpoint-97000',\n",
256
  " ctc_loss_reduction=\"mean\",\n",
257
  " pad_token_id=tokenizer.pad_token_id,\n",
258
  " vocab_size=len(tokenizer),\n",
@@ -303,10 +303,10 @@
303
  {
304
  "data": {
305
  "text/plain": [
306
- "['kenapa jius dan israelis mengejik iranian bukan agama lebih',\n",
307
- " 'but in the olympics time does not really matter what matters is winning',\n",
308
- " 'and if joseph schooling was born with a tinier hand he might not have hit the world first',\n",
309
- " 'melissa passed her number to the young man']"
310
  ]
311
  },
312
  "execution_count": 14,
@@ -362,10 +362,10 @@
362
  "name": "stdout",
363
  "output_type": "stream",
364
  "text": [
365
- "0 kenapa jus dan israelis mengejek iranian bukan agama lebih\n",
366
- "1 but in the olympics time does not really matter what matters is winning\n",
367
- "2 and if joseph schooling was born with a tinier hand he might not have hit the world first\n",
368
- "3 melissa passed her number to the young man\n"
369
  ]
370
  }
371
  ],
@@ -385,10 +385,10 @@
385
  {
386
  "data": {
387
  "text/plain": [
388
- "['kenapa jews dan israelis mengejek iranian bukan agama lebih',\n",
389
- " 'but in the olympics time does not really matter what matters is winning',\n",
390
- " 'and if joseph schooling was born with a tinier hand he might not have hit the wall first',\n",
391
- " 'melissa passed her number to the young man']"
392
  ]
393
  },
394
  "execution_count": 18,
@@ -443,7 +443,7 @@
443
  "name": "stderr",
444
  "output_type": "stream",
445
  "text": [
446
- "100%|██████████| 1240/1240 [07:43<00:00, 2.67it/s]\n"
447
  ]
448
  }
449
  ],
@@ -474,27 +474,25 @@
474
  " cer.append(calculate_cer(batch_y[k], pred[k]))\n",
475
  " \n",
476
  " wer_lm.append(calculate_wer(batch_y[k], d_lm2))\n",
477
- " cer_lm.append(calculate_cer(batch_y[k], d_lm2))\n",
478
- " \n",
479
- " "
480
  ]
481
  },
482
  {
483
  "cell_type": "code",
484
- "execution_count": 26,
485
  "id": "6c6ce8ef",
486
  "metadata": {},
487
  "outputs": [
488
  {
489
  "data": {
490
  "text/plain": [
491
- "(0.12446419219809059,\n",
492
- " 0.04363189219453221,\n",
493
- " 0.09152993800218129,\n",
494
- " 0.03621180629932558)"
495
  ]
496
  },
497
- "execution_count": 26,
498
  "metadata": {},
499
  "output_type": "execute_result"
500
  }
@@ -505,7 +503,7 @@
505
  },
506
  {
507
  "cell_type": "code",
508
- "execution_count": 27,
509
  "id": "cf53914e",
510
  "metadata": {},
511
  "outputs": [],
@@ -517,20 +515,20 @@
517
  },
518
  {
519
  "cell_type": "code",
520
- "execution_count": 28,
521
  "id": "b1558987",
522
  "metadata": {},
523
  "outputs": [
524
  {
525
  "data": {
526
  "text/plain": [
527
- "(0.22565751242221832,\n",
528
- " 0.053659683623049854,\n",
529
- " 0.14256712242006359,\n",
530
- " 0.036930421149001316)"
531
  ]
532
  },
533
- "execution_count": 28,
534
  "metadata": {},
535
  "output_type": "execute_result"
536
  }
@@ -541,20 +539,20 @@
541
  },
542
  {
543
  "cell_type": "code",
544
- "execution_count": 29,
545
  "id": "f340cde7",
546
  "metadata": {},
547
  "outputs": [
548
  {
549
  "data": {
550
  "text/plain": [
551
- "(0.10734402150682842,\n",
552
- " 0.04174804195104746,\n",
553
- " 0.08103191123663189,\n",
554
- " 0.03538238462620066)"
555
  ]
556
  },
557
- "execution_count": 29,
558
  "metadata": {},
559
  "output_type": "execute_result"
560
  }
@@ -565,20 +563,20 @@
565
  },
566
  {
567
  "cell_type": "code",
568
- "execution_count": 30,
569
  "id": "cbc2539f",
570
  "metadata": {},
571
  "outputs": [
572
  {
573
  "data": {
574
  "text/plain": [
575
- "(0.09817787449869257,\n",
576
- " 0.04211892733885779,\n",
577
- " 0.08913415903511501,\n",
578
- " 0.040151154521006656)"
579
  ]
580
  },
581
- "execution_count": 30,
582
  "metadata": {},
583
  "output_type": "execute_result"
584
  }
@@ -589,14 +587,14 @@
589
  },
590
  {
591
  "cell_type": "code",
592
- "execution_count": 31,
593
  "id": "4c543d0c",
594
  "metadata": {},
595
  "outputs": [
596
  {
597
  "data": {
598
  "application/vnd.jupyter.widget-view+json": {
599
- "model_id": "551516109d6a418b95be6884422d853e",
600
  "version_major": 2,
601
  "version_minor": 0
602
  },
@@ -606,27 +604,6 @@
606
  },
607
  "metadata": {},
608
  "output_type": "display_data"
609
- },
610
- {
611
- "name": "stderr",
612
- "output_type": "stream",
613
- "text": [
614
- "remote: Enforcing permissions... \n",
615
- "remote: Allowed refs: all \n",
616
- "To https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed\n",
617
- " 3f5d181..7799685 main -> main\n",
618
- "\n"
619
- ]
620
- },
621
- {
622
- "data": {
623
- "text/plain": [
624
- "'https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed/commit/77996855b40213396051061d8e23b67c2616e614'"
625
- ]
626
- },
627
- "execution_count": 31,
628
- "metadata": {},
629
- "output_type": "execute_result"
630
  }
631
  ],
632
  "source": [
@@ -635,42 +612,13 @@
635
  },
636
  {
637
  "cell_type": "code",
638
- "execution_count": 32,
639
  "id": "05ec385e",
640
  "metadata": {},
641
- "outputs": [
642
- {
643
- "name": "stderr",
644
- "output_type": "stream",
645
- "text": [
646
- "2022-06-01 19:14:20.564262: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
647
- "2022-06-01 19:14:20.603610: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
648
- "2022-06-01 19:14:20.605395: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
649
- "2022-06-01 19:14:20.607506: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n",
650
- "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
651
- "2022-06-01 19:14:20.609495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
652
- "2022-06-01 19:14:20.610833: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
653
- "2022-06-01 19:14:20.612207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
654
- "2022-06-01 19:14:20.615738: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
655
- "2022-06-01 19:14:20.617302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
656
- "2022-06-01 19:14:20.618707: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
657
- "2022-06-01 19:14:20.620281: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
658
- "2022-06-01 19:14:20.620394: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 17119 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6\n",
659
- "\n",
660
- "TFWav2Vec2ForCTC has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tine this model, you need a GPU or a TPU\n",
661
- "2022-06-01 19:14:22.857691: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8100\n",
662
- "2022-06-01 19:14:24.326073: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n",
663
- "2022-06-01 19:14:25.725870: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
664
- "All PyTorch model weights were used when initializing TFWav2Vec2ForCTC.\n",
665
- "\n",
666
- "All the weights of TFWav2Vec2ForCTC were initialized from the PyTorch model.\n",
667
- "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFWav2Vec2ForCTC for predictions without further training.\n"
668
- ]
669
- }
670
- ],
671
  "source": [
672
  "model_tf = TFWav2Vec2ForCTC.from_pretrained(\n",
673
- " './wav2vec2-mixed-v3/checkpoint-97000',\n",
674
  " ctc_loss_reduction=\"mean\",\n",
675
  " pad_token_id=tokenizer.pad_token_id,\n",
676
  " vocab_size=len(tokenizer),\n",
@@ -680,46 +628,10 @@
680
  },
681
  {
682
  "cell_type": "code",
683
- "execution_count": 33,
684
  "id": "e0f3f749",
685
  "metadata": {},
686
- "outputs": [
687
- {
688
- "data": {
689
- "application/vnd.jupyter.widget-view+json": {
690
- "model_id": "a0e5eeee5bf4499da3d5f4adbd5bfd4f",
691
- "version_major": 2,
692
- "version_minor": 0
693
- },
694
- "text/plain": [
695
- "Upload file tf_model.h5: 0%| | 4.00k/1.18G [00:00<?, ?B/s]"
696
- ]
697
- },
698
- "metadata": {},
699
- "output_type": "display_data"
700
- },
701
- {
702
- "name": "stderr",
703
- "output_type": "stream",
704
- "text": [
705
- "remote: Enforcing permissions... \n",
706
- "remote: Allowed refs: all \n",
707
- "To https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed\n",
708
- " 7799685..0b9b0fb main -> main\n",
709
- "\n"
710
- ]
711
- },
712
- {
713
- "data": {
714
- "text/plain": [
715
- "'https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed/commit/0b9b0fb66dc68a4f71ab793274fb28df9f19764f'"
716
- ]
717
- },
718
- "execution_count": 33,
719
- "metadata": {},
720
- "output_type": "execute_result"
721
- }
722
- ],
723
  "source": [
724
  "model_tf.push_to_hub('wav2vec2-xls-r-300m-mixed', organization='mesolitica')"
725
  ]
 
158
  {
159
  "data": {
160
  "text/plain": [
161
+ "[('singlish-test/3057.wav', 'the teenagers paddled hard on their boat'),\n",
162
+ " ('malay-test/705.wav', 'kenapa justin trudeau seperti kemaluan wanita'),\n",
163
+ " ('singlish-test/2631.wav',\n",
164
+ " 'a letter by a mans daughter pleading for leniency was submitted'),\n",
165
+ " ('singlish-test/659.wav', 'and theres thousands of people to meet'),\n",
166
+ " ('singlish-test/809.wav', 'how much lower are the prices'),\n",
167
+ " ('singlish-test/2040.wav',\n",
168
+ " 'suddenly a gun shot was fired from a distance which sent the dogs fleeing in an instant'),\n",
169
+ " ('singlish-test/1616.wav',\n",
170
+ " 'a stronger dollar pressures gold making it more expensive for holders of other currencies'),\n",
171
+ " ('singlish-test/1816.wav',\n",
172
+ " 'family as a priority has become real for me and not just a cliche'),\n",
173
+ " ('malay-test/147.wav',\n",
174
+ " 'adakah anda percaya bahawa donald trump adalah kedatangan kedua jesus christ'),\n",
175
+ " ('singlish-test/3468.wav',\n",
176
+ " 'but much of the technology required for such a fantastic instrument didnt yet exist')]"
177
  ]
178
  },
179
  "execution_count": 8,
 
252
  "outputs": [],
253
  "source": [
254
  "model = AutoModelForCTC.from_pretrained(\n",
255
+ " './checkpoint-115000',\n",
256
  " ctc_loss_reduction=\"mean\",\n",
257
  " pad_token_id=tokenizer.pad_token_id,\n",
258
  " vocab_size=len(tokenizer),\n",
 
303
  {
304
  "data": {
305
  "text/plain": [
306
+ "['the teenagers paddled hard on their boat',\n",
307
+ " 'kenapa justin tradio seperti kemaluan wanita',\n",
308
+ " 'a letter bya mans daughter pleading for lenien te was submitted',\n",
309
+ " 'and theres thousands of people to meet']"
310
  ]
311
  },
312
  "execution_count": 14,
 
362
  "name": "stdout",
363
  "output_type": "stream",
364
  "text": [
365
+ "0 to know more about this years budget click here\n",
366
+ "1 you can bake shortbread cookies just with sugar butter and flour\n",
367
+ "2 all good citizens should learn how to change a light bulb\n",
368
+ "3 as a child madam surley was constantly teased by other children over her appearance\n"
369
  ]
370
  }
371
  ],
 
385
  {
386
  "data": {
387
  "text/plain": [
388
+ "['to know more about this years budget click here',\n",
389
+ " 'you can bake shortbread cookies just with sugar butter and flour',\n",
390
+ " 'all good citizens should learn how to change a light bulb',\n",
391
+ " 'as a child madam shirley was constantly teased by other children over her appearance']"
392
  ]
393
  },
394
  "execution_count": 18,
 
443
  "name": "stderr",
444
  "output_type": "stream",
445
  "text": [
446
+ "100%|█████��████| 1240/1240 [04:23<00:00, 4.71it/s]\n"
447
  ]
448
  }
449
  ],
 
474
  " cer.append(calculate_cer(batch_y[k], pred[k]))\n",
475
  " \n",
476
  " wer_lm.append(calculate_wer(batch_y[k], d_lm2))\n",
477
+ " cer_lm.append(calculate_cer(batch_y[k], d_lm2))"
 
 
478
  ]
479
  },
480
  {
481
  "cell_type": "code",
482
+ "execution_count": 21,
483
  "id": "6c6ce8ef",
484
  "metadata": {},
485
  "outputs": [
486
  {
487
  "data": {
488
  "text/plain": [
489
+ "(0.1322198446007387,\n",
490
+ " 0.0481054244857041,\n",
491
+ " 0.09880169127621556,\n",
492
+ " 0.041196586938584696)"
493
  ]
494
  },
495
+ "execution_count": 21,
496
  "metadata": {},
497
  "output_type": "execute_result"
498
  }
 
503
  },
504
  {
505
  "cell_type": "code",
506
+ "execution_count": 22,
507
  "id": "cf53914e",
508
  "metadata": {},
509
  "outputs": [],
 
515
  },
516
  {
517
  "cell_type": "code",
518
+ "execution_count": 23,
519
  "id": "b1558987",
520
  "metadata": {},
521
  "outputs": [
522
  {
523
  "data": {
524
  "text/plain": [
525
+ "(0.19561999547293663,\n",
526
+ " 0.051636391937588406,\n",
527
+ " 0.12710746406824835,\n",
528
+ " 0.03917689630621449)"
529
  ]
530
  },
531
+ "execution_count": 23,
532
  "metadata": {},
533
  "output_type": "execute_result"
534
  }
 
539
  },
540
  {
541
  "cell_type": "code",
542
+ "execution_count": 24,
543
  "id": "f340cde7",
544
  "metadata": {},
545
  "outputs": [
546
  {
547
  "data": {
548
  "text/plain": [
549
+ "(0.12763802881676573,\n",
550
+ " 0.0494915200071987,\n",
551
+ " 0.09677160640413336,\n",
552
+ " 0.04271234986432335)"
553
  ]
554
  },
555
+ "execution_count": 24,
556
  "metadata": {},
557
  "output_type": "execute_result"
558
  }
 
563
  },
564
  {
565
  "cell_type": "code",
566
+ "execution_count": 26,
567
  "id": "cbc2539f",
568
  "metadata": {},
569
  "outputs": [
570
  {
571
  "data": {
572
  "text/plain": [
573
+ "(0.07993515937860181,\n",
574
+ " 0.035626554824269824,\n",
575
+ " 0.07536807168546154,\n",
576
+ " 0.03487760945087219)"
577
  ]
578
  },
579
+ "execution_count": 26,
580
  "metadata": {},
581
  "output_type": "execute_result"
582
  }
 
587
  },
588
  {
589
  "cell_type": "code",
590
+ "execution_count": null,
591
  "id": "4c543d0c",
592
  "metadata": {},
593
  "outputs": [
594
  {
595
  "data": {
596
  "application/vnd.jupyter.widget-view+json": {
597
+ "model_id": "7270a78ff7874222b18f538069750bc1",
598
  "version_major": 2,
599
  "version_minor": 0
600
  },
 
604
  },
605
  "metadata": {},
606
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  }
608
  ],
609
  "source": [
 
612
  },
613
  {
614
  "cell_type": "code",
615
+ "execution_count": null,
616
  "id": "05ec385e",
617
  "metadata": {},
618
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
  "source": [
620
  "model_tf = TFWav2Vec2ForCTC.from_pretrained(\n",
621
+ " './checkpoint-115000',\n",
622
  " ctc_loss_reduction=\"mean\",\n",
623
  " pad_token_id=tokenizer.pad_token_id,\n",
624
  " vocab_size=len(tokenizer),\n",
 
628
  },
629
  {
630
  "cell_type": "code",
631
+ "execution_count": null,
632
  "id": "e0f3f749",
633
  "metadata": {},
634
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  "source": [
636
  "model_tf.push_to_hub('wav2vec2-xls-r-300m-mixed', organization='mesolitica')"
637
  ]