linshoufan commited on
Commit
6ced054
·
verified ·
1 Parent(s): a6a629d

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a80df9d35d56a8267e8b6930d4a1feab990c862ad5d372dc1cf99f1ebb71fa00
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8680dced3930b9abcaa16374ccf16e3e11a09f4bff5b13e99058831dbac7136a
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f42218cd4fb87045f6ae4434d85dd0960590211fc91c131590301dacdaf970c3
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9cacea682da039c7edf44c59f24360dad4d083c610f2f2574a0c14afe12bff0
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74b353ceab33af34d958a5853dd8446e5dbd5378fd20d13cb24ab0b94e04b08b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba1ca94b44e4e4a1b4f9b82d10d6019eb9437e3c8e0ac1249502acb8cc9bc1b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4020325e91a2750bc47a21538450c88651353213bb907e48726111419ca26f3d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05d76a81ce579e0f1f2bb68af30a2e95dfeadbbfe77f982467ddb98b43e349b1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 37.618349113215096,
3
- "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-1500",
4
- "epoch": 0.4821600771456123,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -454,6 +454,155 @@
454
  "eval_samples_per_second": 2.465,
455
  "eval_steps_per_second": 0.308,
456
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  }
458
  ],
459
  "logging_steps": 25,
@@ -461,7 +610,7 @@
461
  "num_input_tokens_seen": 0,
462
  "num_train_epochs": 1,
463
  "save_steps": 500,
464
- "total_flos": 6.92604960768e+18,
465
  "train_batch_size": 16,
466
  "trial_name": null,
467
  "trial_params": null
 
1
  {
2
+ "best_metric": 35.404720629417255,
3
+ "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-2000",
4
+ "epoch": 0.6428801028608164,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
454
  "eval_samples_per_second": 2.465,
455
  "eval_steps_per_second": 0.308,
456
  "step": 1500
457
+ },
458
+ {
459
+ "epoch": 0.49,
460
+ "grad_norm": 9.844304084777832,
461
+ "learning_rate": 5.267353038857523e-06,
462
+ "loss": 0.5035,
463
+ "step": 1525
464
+ },
465
+ {
466
+ "epoch": 0.5,
467
+ "grad_norm": 11.165616035461426,
468
+ "learning_rate": 5.184324144802392e-06,
469
+ "loss": 0.449,
470
+ "step": 1550
471
+ },
472
+ {
473
+ "epoch": 0.51,
474
+ "grad_norm": 10.43535327911377,
475
+ "learning_rate": 5.10129525074726e-06,
476
+ "loss": 0.4471,
477
+ "step": 1575
478
+ },
479
+ {
480
+ "epoch": 0.51,
481
+ "grad_norm": 9.737510681152344,
482
+ "learning_rate": 5.0182663566921295e-06,
483
+ "loss": 0.4779,
484
+ "step": 1600
485
+ },
486
+ {
487
+ "epoch": 0.52,
488
+ "grad_norm": 10.221022605895996,
489
+ "learning_rate": 4.935237462636998e-06,
490
+ "loss": 0.4266,
491
+ "step": 1625
492
+ },
493
+ {
494
+ "epoch": 0.53,
495
+ "grad_norm": 7.031712532043457,
496
+ "learning_rate": 4.852208568581867e-06,
497
+ "loss": 0.4355,
498
+ "step": 1650
499
+ },
500
+ {
501
+ "epoch": 0.54,
502
+ "grad_norm": 10.13843822479248,
503
+ "learning_rate": 4.769179674526736e-06,
504
+ "loss": 0.4506,
505
+ "step": 1675
506
+ },
507
+ {
508
+ "epoch": 0.55,
509
+ "grad_norm": 14.100777626037598,
510
+ "learning_rate": 4.686150780471604e-06,
511
+ "loss": 0.4484,
512
+ "step": 1700
513
+ },
514
+ {
515
+ "epoch": 0.55,
516
+ "grad_norm": 11.218331336975098,
517
+ "learning_rate": 4.603121886416473e-06,
518
+ "loss": 0.4637,
519
+ "step": 1725
520
+ },
521
+ {
522
+ "epoch": 0.56,
523
+ "grad_norm": 9.891203880310059,
524
+ "learning_rate": 4.520092992361343e-06,
525
+ "loss": 0.4142,
526
+ "step": 1750
527
+ },
528
+ {
529
+ "epoch": 0.57,
530
+ "grad_norm": 9.585916519165039,
531
+ "learning_rate": 4.437064098306211e-06,
532
+ "loss": 0.4202,
533
+ "step": 1775
534
+ },
535
+ {
536
+ "epoch": 0.58,
537
+ "grad_norm": 10.81905460357666,
538
+ "learning_rate": 4.35403520425108e-06,
539
+ "loss": 0.4459,
540
+ "step": 1800
541
+ },
542
+ {
543
+ "epoch": 0.59,
544
+ "grad_norm": 13.257423400878906,
545
+ "learning_rate": 4.271006310195949e-06,
546
+ "loss": 0.448,
547
+ "step": 1825
548
+ },
549
+ {
550
+ "epoch": 0.59,
551
+ "grad_norm": 9.057276725769043,
552
+ "learning_rate": 4.187977416140817e-06,
553
+ "loss": 0.4043,
554
+ "step": 1850
555
+ },
556
+ {
557
+ "epoch": 0.6,
558
+ "grad_norm": 11.002601623535156,
559
+ "learning_rate": 4.104948522085686e-06,
560
+ "loss": 0.4011,
561
+ "step": 1875
562
+ },
563
+ {
564
+ "epoch": 0.61,
565
+ "grad_norm": 15.421494483947754,
566
+ "learning_rate": 4.021919628030555e-06,
567
+ "loss": 0.4208,
568
+ "step": 1900
569
+ },
570
+ {
571
+ "epoch": 0.62,
572
+ "grad_norm": 12.186066627502441,
573
+ "learning_rate": 3.938890733975424e-06,
574
+ "loss": 0.389,
575
+ "step": 1925
576
+ },
577
+ {
578
+ "epoch": 0.63,
579
+ "grad_norm": 8.680899620056152,
580
+ "learning_rate": 3.855861839920293e-06,
581
+ "loss": 0.4189,
582
+ "step": 1950
583
+ },
584
+ {
585
+ "epoch": 0.63,
586
+ "grad_norm": 10.597740173339844,
587
+ "learning_rate": 3.7728329458651612e-06,
588
+ "loss": 0.3654,
589
+ "step": 1975
590
+ },
591
+ {
592
+ "epoch": 0.64,
593
+ "grad_norm": 10.675308227539062,
594
+ "learning_rate": 3.6898040518100305e-06,
595
+ "loss": 0.3512,
596
+ "step": 2000
597
+ },
598
+ {
599
+ "epoch": 0.64,
600
+ "eval_cer": 35.404720629417255,
601
+ "eval_loss": 0.4709227383136749,
602
+ "eval_runtime": 1802.3985,
603
+ "eval_samples_per_second": 2.461,
604
+ "eval_steps_per_second": 0.308,
605
+ "step": 2000
606
  }
607
  ],
608
  "logging_steps": 25,
 
610
  "num_input_tokens_seen": 0,
611
  "num_train_epochs": 1,
612
  "save_steps": 500,
613
+ "total_flos": 9.23473281024e+18,
614
  "train_batch_size": 16,
615
  "trial_name": null,
616
  "trial_params": null