huseinzol05
commited on
Commit
·
d154450
1
Parent(s):
579e67e
Upload evaluate-gpu.ipynb
Browse files- evaluate-gpu.ipynb +63 -151
evaluate-gpu.ipynb
CHANGED
@@ -158,22 +158,22 @@
|
|
158 |
{
|
159 |
"data": {
|
160 |
"text/plain": [
|
161 |
-
"[('
|
162 |
-
"
|
163 |
-
" ('singlish-test/
|
164 |
-
" '
|
165 |
-
" ('singlish-test/
|
166 |
-
"
|
167 |
-
" ('singlish-test/
|
168 |
-
"
|
169 |
-
"
|
170 |
-
"
|
171 |
-
" ('
|
172 |
-
"
|
173 |
-
" ('
|
174 |
-
" '
|
175 |
-
" ('singlish-test/
|
176 |
-
" 'but
|
177 |
]
|
178 |
},
|
179 |
"execution_count": 8,
|
@@ -252,7 +252,7 @@
|
|
252 |
"outputs": [],
|
253 |
"source": [
|
254 |
"model = AutoModelForCTC.from_pretrained(\n",
|
255 |
-
" './
|
256 |
" ctc_loss_reduction=\"mean\",\n",
|
257 |
" pad_token_id=tokenizer.pad_token_id,\n",
|
258 |
" vocab_size=len(tokenizer),\n",
|
@@ -303,10 +303,10 @@
|
|
303 |
{
|
304 |
"data": {
|
305 |
"text/plain": [
|
306 |
-
"['
|
307 |
-
" '
|
308 |
-
" '
|
309 |
-
" '
|
310 |
]
|
311 |
},
|
312 |
"execution_count": 14,
|
@@ -362,10 +362,10 @@
|
|
362 |
"name": "stdout",
|
363 |
"output_type": "stream",
|
364 |
"text": [
|
365 |
-
"0
|
366 |
-
"1
|
367 |
-
"2
|
368 |
-
"3
|
369 |
]
|
370 |
}
|
371 |
],
|
@@ -385,10 +385,10 @@
|
|
385 |
{
|
386 |
"data": {
|
387 |
"text/plain": [
|
388 |
-
"['
|
389 |
-
" '
|
390 |
-
" '
|
391 |
-
" '
|
392 |
]
|
393 |
},
|
394 |
"execution_count": 18,
|
@@ -443,7 +443,7 @@
|
|
443 |
"name": "stderr",
|
444 |
"output_type": "stream",
|
445 |
"text": [
|
446 |
-
"100
|
447 |
]
|
448 |
}
|
449 |
],
|
@@ -474,27 +474,25 @@
|
|
474 |
" cer.append(calculate_cer(batch_y[k], pred[k]))\n",
|
475 |
" \n",
|
476 |
" wer_lm.append(calculate_wer(batch_y[k], d_lm2))\n",
|
477 |
-
" cer_lm.append(calculate_cer(batch_y[k], d_lm2))
|
478 |
-
" \n",
|
479 |
-
" "
|
480 |
]
|
481 |
},
|
482 |
{
|
483 |
"cell_type": "code",
|
484 |
-
"execution_count":
|
485 |
"id": "6c6ce8ef",
|
486 |
"metadata": {},
|
487 |
"outputs": [
|
488 |
{
|
489 |
"data": {
|
490 |
"text/plain": [
|
491 |
-
"(0.
|
492 |
-
" 0.
|
493 |
-
" 0.
|
494 |
-
" 0.
|
495 |
]
|
496 |
},
|
497 |
-
"execution_count":
|
498 |
"metadata": {},
|
499 |
"output_type": "execute_result"
|
500 |
}
|
@@ -505,7 +503,7 @@
|
|
505 |
},
|
506 |
{
|
507 |
"cell_type": "code",
|
508 |
-
"execution_count":
|
509 |
"id": "cf53914e",
|
510 |
"metadata": {},
|
511 |
"outputs": [],
|
@@ -517,20 +515,20 @@
|
|
517 |
},
|
518 |
{
|
519 |
"cell_type": "code",
|
520 |
-
"execution_count":
|
521 |
"id": "b1558987",
|
522 |
"metadata": {},
|
523 |
"outputs": [
|
524 |
{
|
525 |
"data": {
|
526 |
"text/plain": [
|
527 |
-
"(0.
|
528 |
-
" 0.
|
529 |
-
" 0.
|
530 |
-
" 0.
|
531 |
]
|
532 |
},
|
533 |
-
"execution_count":
|
534 |
"metadata": {},
|
535 |
"output_type": "execute_result"
|
536 |
}
|
@@ -541,20 +539,20 @@
|
|
541 |
},
|
542 |
{
|
543 |
"cell_type": "code",
|
544 |
-
"execution_count":
|
545 |
"id": "f340cde7",
|
546 |
"metadata": {},
|
547 |
"outputs": [
|
548 |
{
|
549 |
"data": {
|
550 |
"text/plain": [
|
551 |
-
"(0.
|
552 |
-
" 0.
|
553 |
-
" 0.
|
554 |
-
" 0.
|
555 |
]
|
556 |
},
|
557 |
-
"execution_count":
|
558 |
"metadata": {},
|
559 |
"output_type": "execute_result"
|
560 |
}
|
@@ -565,20 +563,20 @@
|
|
565 |
},
|
566 |
{
|
567 |
"cell_type": "code",
|
568 |
-
"execution_count":
|
569 |
"id": "cbc2539f",
|
570 |
"metadata": {},
|
571 |
"outputs": [
|
572 |
{
|
573 |
"data": {
|
574 |
"text/plain": [
|
575 |
-
"(0.
|
576 |
-
" 0.
|
577 |
-
" 0.
|
578 |
-
" 0.
|
579 |
]
|
580 |
},
|
581 |
-
"execution_count":
|
582 |
"metadata": {},
|
583 |
"output_type": "execute_result"
|
584 |
}
|
@@ -589,14 +587,14 @@
|
|
589 |
},
|
590 |
{
|
591 |
"cell_type": "code",
|
592 |
-
"execution_count":
|
593 |
"id": "4c543d0c",
|
594 |
"metadata": {},
|
595 |
"outputs": [
|
596 |
{
|
597 |
"data": {
|
598 |
"application/vnd.jupyter.widget-view+json": {
|
599 |
-
"model_id": "
|
600 |
"version_major": 2,
|
601 |
"version_minor": 0
|
602 |
},
|
@@ -606,27 +604,6 @@
|
|
606 |
},
|
607 |
"metadata": {},
|
608 |
"output_type": "display_data"
|
609 |
-
},
|
610 |
-
{
|
611 |
-
"name": "stderr",
|
612 |
-
"output_type": "stream",
|
613 |
-
"text": [
|
614 |
-
"remote: Enforcing permissions... \n",
|
615 |
-
"remote: Allowed refs: all \n",
|
616 |
-
"To https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed\n",
|
617 |
-
" 3f5d181..7799685 main -> main\n",
|
618 |
-
"\n"
|
619 |
-
]
|
620 |
-
},
|
621 |
-
{
|
622 |
-
"data": {
|
623 |
-
"text/plain": [
|
624 |
-
"'https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed/commit/77996855b40213396051061d8e23b67c2616e614'"
|
625 |
-
]
|
626 |
-
},
|
627 |
-
"execution_count": 31,
|
628 |
-
"metadata": {},
|
629 |
-
"output_type": "execute_result"
|
630 |
}
|
631 |
],
|
632 |
"source": [
|
@@ -635,42 +612,13 @@
|
|
635 |
},
|
636 |
{
|
637 |
"cell_type": "code",
|
638 |
-
"execution_count":
|
639 |
"id": "05ec385e",
|
640 |
"metadata": {},
|
641 |
-
"outputs": [
|
642 |
-
{
|
643 |
-
"name": "stderr",
|
644 |
-
"output_type": "stream",
|
645 |
-
"text": [
|
646 |
-
"2022-06-01 19:14:20.564262: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
647 |
-
"2022-06-01 19:14:20.603610: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
648 |
-
"2022-06-01 19:14:20.605395: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
649 |
-
"2022-06-01 19:14:20.607506: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n",
|
650 |
-
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
651 |
-
"2022-06-01 19:14:20.609495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
652 |
-
"2022-06-01 19:14:20.610833: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
653 |
-
"2022-06-01 19:14:20.612207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
654 |
-
"2022-06-01 19:14:20.615738: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
655 |
-
"2022-06-01 19:14:20.617302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
656 |
-
"2022-06-01 19:14:20.618707: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
|
657 |
-
"2022-06-01 19:14:20.620281: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
|
658 |
-
"2022-06-01 19:14:20.620394: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 17119 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6\n",
|
659 |
-
"\n",
|
660 |
-
"TFWav2Vec2ForCTC has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tine this model, you need a GPU or a TPU\n",
|
661 |
-
"2022-06-01 19:14:22.857691: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8100\n",
|
662 |
-
"2022-06-01 19:14:24.326073: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n",
|
663 |
-
"2022-06-01 19:14:25.725870: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
|
664 |
-
"All PyTorch model weights were used when initializing TFWav2Vec2ForCTC.\n",
|
665 |
-
"\n",
|
666 |
-
"All the weights of TFWav2Vec2ForCTC were initialized from the PyTorch model.\n",
|
667 |
-
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFWav2Vec2ForCTC for predictions without further training.\n"
|
668 |
-
]
|
669 |
-
}
|
670 |
-
],
|
671 |
"source": [
|
672 |
"model_tf = TFWav2Vec2ForCTC.from_pretrained(\n",
|
673 |
-
" './
|
674 |
" ctc_loss_reduction=\"mean\",\n",
|
675 |
" pad_token_id=tokenizer.pad_token_id,\n",
|
676 |
" vocab_size=len(tokenizer),\n",
|
@@ -680,46 +628,10 @@
|
|
680 |
},
|
681 |
{
|
682 |
"cell_type": "code",
|
683 |
-
"execution_count":
|
684 |
"id": "e0f3f749",
|
685 |
"metadata": {},
|
686 |
-
"outputs": [
|
687 |
-
{
|
688 |
-
"data": {
|
689 |
-
"application/vnd.jupyter.widget-view+json": {
|
690 |
-
"model_id": "a0e5eeee5bf4499da3d5f4adbd5bfd4f",
|
691 |
-
"version_major": 2,
|
692 |
-
"version_minor": 0
|
693 |
-
},
|
694 |
-
"text/plain": [
|
695 |
-
"Upload file tf_model.h5: 0%| | 4.00k/1.18G [00:00<?, ?B/s]"
|
696 |
-
]
|
697 |
-
},
|
698 |
-
"metadata": {},
|
699 |
-
"output_type": "display_data"
|
700 |
-
},
|
701 |
-
{
|
702 |
-
"name": "stderr",
|
703 |
-
"output_type": "stream",
|
704 |
-
"text": [
|
705 |
-
"remote: Enforcing permissions... \n",
|
706 |
-
"remote: Allowed refs: all \n",
|
707 |
-
"To https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed\n",
|
708 |
-
" 7799685..0b9b0fb main -> main\n",
|
709 |
-
"\n"
|
710 |
-
]
|
711 |
-
},
|
712 |
-
{
|
713 |
-
"data": {
|
714 |
-
"text/plain": [
|
715 |
-
"'https://huggingface.co/mesolitica/wav2vec2-xls-r-300m-mixed/commit/0b9b0fb66dc68a4f71ab793274fb28df9f19764f'"
|
716 |
-
]
|
717 |
-
},
|
718 |
-
"execution_count": 33,
|
719 |
-
"metadata": {},
|
720 |
-
"output_type": "execute_result"
|
721 |
-
}
|
722 |
-
],
|
723 |
"source": [
|
724 |
"model_tf.push_to_hub('wav2vec2-xls-r-300m-mixed', organization='mesolitica')"
|
725 |
]
|
|
|
158 |
{
|
159 |
"data": {
|
160 |
"text/plain": [
|
161 |
+
"[('singlish-test/3057.wav', 'the teenagers paddled hard on their boat'),\n",
|
162 |
+
" ('malay-test/705.wav', 'kenapa justin trudeau seperti kemaluan wanita'),\n",
|
163 |
+
" ('singlish-test/2631.wav',\n",
|
164 |
+
" 'a letter by a mans daughter pleading for leniency was submitted'),\n",
|
165 |
+
" ('singlish-test/659.wav', 'and theres thousands of people to meet'),\n",
|
166 |
+
" ('singlish-test/809.wav', 'how much lower are the prices'),\n",
|
167 |
+
" ('singlish-test/2040.wav',\n",
|
168 |
+
" 'suddenly a gun shot was fired from a distance which sent the dogs fleeing in an instant'),\n",
|
169 |
+
" ('singlish-test/1616.wav',\n",
|
170 |
+
" 'a stronger dollar pressures gold making it more expensive for holders of other currencies'),\n",
|
171 |
+
" ('singlish-test/1816.wav',\n",
|
172 |
+
" 'family as a priority has become real for me and not just a cliche'),\n",
|
173 |
+
" ('malay-test/147.wav',\n",
|
174 |
+
" 'adakah anda percaya bahawa donald trump adalah kedatangan kedua jesus christ'),\n",
|
175 |
+
" ('singlish-test/3468.wav',\n",
|
176 |
+
" 'but much of the technology required for such a fantastic instrument didnt yet exist')]"
|
177 |
]
|
178 |
},
|
179 |
"execution_count": 8,
|
|
|
252 |
"outputs": [],
|
253 |
"source": [
|
254 |
"model = AutoModelForCTC.from_pretrained(\n",
|
255 |
+
" './checkpoint-115000',\n",
|
256 |
" ctc_loss_reduction=\"mean\",\n",
|
257 |
" pad_token_id=tokenizer.pad_token_id,\n",
|
258 |
" vocab_size=len(tokenizer),\n",
|
|
|
303 |
{
|
304 |
"data": {
|
305 |
"text/plain": [
|
306 |
+
"['the teenagers paddled hard on their boat',\n",
|
307 |
+
" 'kenapa justin tradio seperti kemaluan wanita',\n",
|
308 |
+
" 'a letter bya mans daughter pleading for lenien te was submitted',\n",
|
309 |
+
" 'and theres thousands of people to meet']"
|
310 |
]
|
311 |
},
|
312 |
"execution_count": 14,
|
|
|
362 |
"name": "stdout",
|
363 |
"output_type": "stream",
|
364 |
"text": [
|
365 |
+
"0 to know more about this years budget click here\n",
|
366 |
+
"1 you can bake shortbread cookies just with sugar butter and flour\n",
|
367 |
+
"2 all good citizens should learn how to change a light bulb\n",
|
368 |
+
"3 as a child madam surley was constantly teased by other children over her appearance\n"
|
369 |
]
|
370 |
}
|
371 |
],
|
|
|
385 |
{
|
386 |
"data": {
|
387 |
"text/plain": [
|
388 |
+
"['to know more about this years budget click here',\n",
|
389 |
+
" 'you can bake shortbread cookies just with sugar butter and flour',\n",
|
390 |
+
" 'all good citizens should learn how to change a light bulb',\n",
|
391 |
+
" 'as a child madam shirley was constantly teased by other children over her appearance']"
|
392 |
]
|
393 |
},
|
394 |
"execution_count": 18,
|
|
|
443 |
"name": "stderr",
|
444 |
"output_type": "stream",
|
445 |
"text": [
|
446 |
+
"100%|█████��████| 1240/1240 [04:23<00:00, 4.71it/s]\n"
|
447 |
]
|
448 |
}
|
449 |
],
|
|
|
474 |
" cer.append(calculate_cer(batch_y[k], pred[k]))\n",
|
475 |
" \n",
|
476 |
" wer_lm.append(calculate_wer(batch_y[k], d_lm2))\n",
|
477 |
+
" cer_lm.append(calculate_cer(batch_y[k], d_lm2))"
|
|
|
|
|
478 |
]
|
479 |
},
|
480 |
{
|
481 |
"cell_type": "code",
|
482 |
+
"execution_count": 21,
|
483 |
"id": "6c6ce8ef",
|
484 |
"metadata": {},
|
485 |
"outputs": [
|
486 |
{
|
487 |
"data": {
|
488 |
"text/plain": [
|
489 |
+
"(0.1322198446007387,\n",
|
490 |
+
" 0.0481054244857041,\n",
|
491 |
+
" 0.09880169127621556,\n",
|
492 |
+
" 0.041196586938584696)"
|
493 |
]
|
494 |
},
|
495 |
+
"execution_count": 21,
|
496 |
"metadata": {},
|
497 |
"output_type": "execute_result"
|
498 |
}
|
|
|
503 |
},
|
504 |
{
|
505 |
"cell_type": "code",
|
506 |
+
"execution_count": 22,
|
507 |
"id": "cf53914e",
|
508 |
"metadata": {},
|
509 |
"outputs": [],
|
|
|
515 |
},
|
516 |
{
|
517 |
"cell_type": "code",
|
518 |
+
"execution_count": 23,
|
519 |
"id": "b1558987",
|
520 |
"metadata": {},
|
521 |
"outputs": [
|
522 |
{
|
523 |
"data": {
|
524 |
"text/plain": [
|
525 |
+
"(0.19561999547293663,\n",
|
526 |
+
" 0.051636391937588406,\n",
|
527 |
+
" 0.12710746406824835,\n",
|
528 |
+
" 0.03917689630621449)"
|
529 |
]
|
530 |
},
|
531 |
+
"execution_count": 23,
|
532 |
"metadata": {},
|
533 |
"output_type": "execute_result"
|
534 |
}
|
|
|
539 |
},
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
+
"execution_count": 24,
|
543 |
"id": "f340cde7",
|
544 |
"metadata": {},
|
545 |
"outputs": [
|
546 |
{
|
547 |
"data": {
|
548 |
"text/plain": [
|
549 |
+
"(0.12763802881676573,\n",
|
550 |
+
" 0.0494915200071987,\n",
|
551 |
+
" 0.09677160640413336,\n",
|
552 |
+
" 0.04271234986432335)"
|
553 |
]
|
554 |
},
|
555 |
+
"execution_count": 24,
|
556 |
"metadata": {},
|
557 |
"output_type": "execute_result"
|
558 |
}
|
|
|
563 |
},
|
564 |
{
|
565 |
"cell_type": "code",
|
566 |
+
"execution_count": 26,
|
567 |
"id": "cbc2539f",
|
568 |
"metadata": {},
|
569 |
"outputs": [
|
570 |
{
|
571 |
"data": {
|
572 |
"text/plain": [
|
573 |
+
"(0.07993515937860181,\n",
|
574 |
+
" 0.035626554824269824,\n",
|
575 |
+
" 0.07536807168546154,\n",
|
576 |
+
" 0.03487760945087219)"
|
577 |
]
|
578 |
},
|
579 |
+
"execution_count": 26,
|
580 |
"metadata": {},
|
581 |
"output_type": "execute_result"
|
582 |
}
|
|
|
587 |
},
|
588 |
{
|
589 |
"cell_type": "code",
|
590 |
+
"execution_count": null,
|
591 |
"id": "4c543d0c",
|
592 |
"metadata": {},
|
593 |
"outputs": [
|
594 |
{
|
595 |
"data": {
|
596 |
"application/vnd.jupyter.widget-view+json": {
|
597 |
+
"model_id": "7270a78ff7874222b18f538069750bc1",
|
598 |
"version_major": 2,
|
599 |
"version_minor": 0
|
600 |
},
|
|
|
604 |
},
|
605 |
"metadata": {},
|
606 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
}
|
608 |
],
|
609 |
"source": [
|
|
|
612 |
},
|
613 |
{
|
614 |
"cell_type": "code",
|
615 |
+
"execution_count": null,
|
616 |
"id": "05ec385e",
|
617 |
"metadata": {},
|
618 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
"source": [
|
620 |
"model_tf = TFWav2Vec2ForCTC.from_pretrained(\n",
|
621 |
+
" './checkpoint-115000',\n",
|
622 |
" ctc_loss_reduction=\"mean\",\n",
|
623 |
" pad_token_id=tokenizer.pad_token_id,\n",
|
624 |
" vocab_size=len(tokenizer),\n",
|
|
|
628 |
},
|
629 |
{
|
630 |
"cell_type": "code",
|
631 |
+
"execution_count": null,
|
632 |
"id": "e0f3f749",
|
633 |
"metadata": {},
|
634 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
"source": [
|
636 |
"model_tf.push_to_hub('wav2vec2-xls-r-300m-mixed', organization='mesolitica')"
|
637 |
]
|