linshoufan commited on
Commit
dfa07db
·
verified ·
1 Parent(s): 4279be4

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8680dced3930b9abcaa16374ccf16e3e11a09f4bff5b13e99058831dbac7136a
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f5cbe7a3b552f05f3563fcd2bc80d6b2d4e9fd013658478fd9a3872e397efe8
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9cacea682da039c7edf44c59f24360dad4d083c610f2f2574a0c14afe12bff0
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d39efa613b798fd03aa7aa59e14790c24a560f58a5ce98fbd4c0ad62ebd17c
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba1ca94b44e4e4a1b4f9b82d10d6019eb9437e3c8e0ac1249502acb8cc9bc1b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa71a0412d6ad7eda1eee97e70ad545f4d60c017a08d85ea9e1f842b586619fa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05d76a81ce579e0f1f2bb68af30a2e95dfeadbbfe77f982467ddb98b43e349b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbab736cb879a4bc39da788157b54992b7cc13715d80715f4b4828876ce1b081
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 35.404720629417255,
3
- "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-2000",
4
- "epoch": 0.6428801028608164,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -603,6 +603,155 @@
603
  "eval_samples_per_second": 2.461,
604
  "eval_steps_per_second": 0.308,
605
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
  }
607
  ],
608
  "logging_steps": 25,
@@ -610,7 +759,7 @@
610
  "num_input_tokens_seen": 0,
611
  "num_train_epochs": 1,
612
  "save_steps": 500,
613
- "total_flos": 9.23473281024e+18,
614
  "train_batch_size": 16,
615
  "trial_name": null,
616
  "trial_params": null
 
1
  {
2
+ "best_metric": 33.57781037471663,
3
+ "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-2500",
4
+ "epoch": 0.8036001285760206,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
603
  "eval_samples_per_second": 2.461,
604
  "eval_steps_per_second": 0.308,
605
  "step": 2000
606
+ },
607
+ {
608
+ "epoch": 0.65,
609
+ "grad_norm": 9.923101425170898,
610
+ "learning_rate": 3.6067751577548985e-06,
611
+ "loss": 0.4361,
612
+ "step": 2025
613
+ },
614
+ {
615
+ "epoch": 0.66,
616
+ "grad_norm": 9.01765251159668,
617
+ "learning_rate": 3.523746263699768e-06,
618
+ "loss": 0.3896,
619
+ "step": 2050
620
+ },
621
+ {
622
+ "epoch": 0.67,
623
+ "grad_norm": 11.23643684387207,
624
+ "learning_rate": 3.4407173696446367e-06,
625
+ "loss": 0.3453,
626
+ "step": 2075
627
+ },
628
+ {
629
+ "epoch": 0.68,
630
+ "grad_norm": 9.193674087524414,
631
+ "learning_rate": 3.3576884755895056e-06,
632
+ "loss": 0.3888,
633
+ "step": 2100
634
+ },
635
+ {
636
+ "epoch": 0.68,
637
+ "grad_norm": 8.438018798828125,
638
+ "learning_rate": 3.274659581534374e-06,
639
+ "loss": 0.3798,
640
+ "step": 2125
641
+ },
642
+ {
643
+ "epoch": 0.69,
644
+ "grad_norm": 9.949082374572754,
645
+ "learning_rate": 3.191630687479243e-06,
646
+ "loss": 0.3828,
647
+ "step": 2150
648
+ },
649
+ {
650
+ "epoch": 0.7,
651
+ "grad_norm": 12.07507610321045,
652
+ "learning_rate": 3.1086017934241117e-06,
653
+ "loss": 0.4027,
654
+ "step": 2175
655
+ },
656
+ {
657
+ "epoch": 0.71,
658
+ "grad_norm": 12.350488662719727,
659
+ "learning_rate": 3.025572899368981e-06,
660
+ "loss": 0.3791,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 0.72,
665
+ "grad_norm": 12.681595802307129,
666
+ "learning_rate": 2.9425440053138495e-06,
667
+ "loss": 0.3863,
668
+ "step": 2225
669
+ },
670
+ {
671
+ "epoch": 0.72,
672
+ "grad_norm": 13.789870262145996,
673
+ "learning_rate": 2.8595151112587184e-06,
674
+ "loss": 0.3688,
675
+ "step": 2250
676
+ },
677
+ {
678
+ "epoch": 0.73,
679
+ "grad_norm": 11.885881423950195,
680
+ "learning_rate": 2.7764862172035872e-06,
681
+ "loss": 0.3661,
682
+ "step": 2275
683
+ },
684
+ {
685
+ "epoch": 0.74,
686
+ "grad_norm": 10.707484245300293,
687
+ "learning_rate": 2.6934573231484557e-06,
688
+ "loss": 0.3731,
689
+ "step": 2300
690
+ },
691
+ {
692
+ "epoch": 0.75,
693
+ "grad_norm": 12.371014595031738,
694
+ "learning_rate": 2.6104284290933245e-06,
695
+ "loss": 0.3651,
696
+ "step": 2325
697
+ },
698
+ {
699
+ "epoch": 0.76,
700
+ "grad_norm": 12.415855407714844,
701
+ "learning_rate": 2.5273995350381934e-06,
702
+ "loss": 0.3529,
703
+ "step": 2350
704
+ },
705
+ {
706
+ "epoch": 0.76,
707
+ "grad_norm": 12.046368598937988,
708
+ "learning_rate": 2.4443706409830623e-06,
709
+ "loss": 0.3565,
710
+ "step": 2375
711
+ },
712
+ {
713
+ "epoch": 0.77,
714
+ "grad_norm": 10.2451810836792,
715
+ "learning_rate": 2.361341746927931e-06,
716
+ "loss": 0.3337,
717
+ "step": 2400
718
+ },
719
+ {
720
+ "epoch": 0.78,
721
+ "grad_norm": 7.761926174163818,
722
+ "learning_rate": 2.2783128528728e-06,
723
+ "loss": 0.3636,
724
+ "step": 2425
725
+ },
726
+ {
727
+ "epoch": 0.79,
728
+ "grad_norm": 9.736420631408691,
729
+ "learning_rate": 2.1952839588176684e-06,
730
+ "loss": 0.346,
731
+ "step": 2450
732
+ },
733
+ {
734
+ "epoch": 0.8,
735
+ "grad_norm": 9.760013580322266,
736
+ "learning_rate": 2.1122550647625377e-06,
737
+ "loss": 0.3535,
738
+ "step": 2475
739
+ },
740
+ {
741
+ "epoch": 0.8,
742
+ "grad_norm": 9.893476486206055,
743
+ "learning_rate": 2.029226170707406e-06,
744
+ "loss": 0.3758,
745
+ "step": 2500
746
+ },
747
+ {
748
+ "epoch": 0.8,
749
+ "eval_cer": 33.57781037471663,
750
+ "eval_loss": 0.43632233142852783,
751
+ "eval_runtime": 1812.7817,
752
+ "eval_samples_per_second": 2.447,
753
+ "eval_steps_per_second": 0.306,
754
+ "step": 2500
755
  }
756
  ],
757
  "logging_steps": 25,
 
759
  "num_input_tokens_seen": 0,
760
  "num_train_epochs": 1,
761
  "save_steps": 500,
762
+ "total_flos": 1.15434160128e+19,
763
  "train_batch_size": 16,
764
  "trial_name": null,
765
  "trial_params": null