emilios commited on
Commit
a933910
·
1 Parent(s): c044832

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 587.82,
3
  "eval_loss": 0.56689453125,
4
- "eval_runtime": 154.2304,
5
- "eval_samples_per_second": 1.764,
6
- "eval_steps_per_second": 0.11,
7
  "eval_wer": 9.899702823179792,
8
- "train_loss": 1.4810419082641601e-05,
9
- "train_runtime": 3866.6425,
10
- "train_samples_per_second": 82.759,
11
- "train_steps_per_second": 2.586
12
  }
 
1
  {
2
+ "epoch": 646.82,
3
  "eval_loss": 0.56689453125,
4
+ "eval_runtime": 153.5566,
5
+ "eval_samples_per_second": 1.771,
6
+ "eval_steps_per_second": 0.111,
7
  "eval_wer": 9.899702823179792,
8
+ "train_loss": 1.3152035799893466e-05,
9
+ "train_runtime": 3841.6705,
10
+ "train_samples_per_second": 91.627,
11
+ "train_steps_per_second": 2.863
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 587.82,
3
  "eval_loss": 0.56689453125,
4
- "eval_runtime": 154.2304,
5
- "eval_samples_per_second": 1.764,
6
- "eval_steps_per_second": 0.11,
7
  "eval_wer": 9.899702823179792
8
  }
 
1
  {
2
+ "epoch": 646.82,
3
  "eval_loss": 0.56689453125,
4
+ "eval_runtime": 153.5566,
5
+ "eval_samples_per_second": 1.771,
6
+ "eval_steps_per_second": 0.111,
7
  "eval_wer": 9.899702823179792
8
  }
runs/Dec23_11-21-18_129-146-176-120/events.out.tfevents.1671798661.129-146-176-120.862675.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3384bedae216f607575a8ac5d62e171cd6060d79d5164dce041c5d86b8a4129
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 587.82,
3
- "train_loss": 1.4810419082641601e-05,
4
- "train_runtime": 3866.6425,
5
- "train_samples_per_second": 82.759,
6
- "train_steps_per_second": 2.586
7
  }
 
1
  {
2
+ "epoch": 646.82,
3
+ "train_loss": 1.3152035799893466e-05,
4
+ "train_runtime": 3841.6705,
5
+ "train_samples_per_second": 91.627,
6
+ "train_steps_per_second": 2.863
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 9.899702823179792,
3
  "best_model_checkpoint": "./checkpoint-8000",
4
- "epoch": 587.8235294117648,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2498,18 +2498,267 @@
2498
  "step": 10000
2499
  },
2500
  {
2501
- "epoch": 587.82,
2502
- "step": 10000,
2503
- "total_flos": 3.080532552348504e+20,
2504
- "train_loss": 1.4810419082641601e-05,
2505
- "train_runtime": 3866.6425,
2506
- "train_samples_per_second": 82.759,
2507
- "train_steps_per_second": 2.586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2508
  }
2509
  ],
2510
- "max_steps": 10000,
2511
- "num_train_epochs": 589,
2512
- "total_flos": 3.080532552348504e+20,
2513
  "trial_name": null,
2514
  "trial_params": null
2515
  }
 
1
  {
2
  "best_metric": 9.899702823179792,
3
  "best_model_checkpoint": "./checkpoint-8000",
4
+ "epoch": 646.8235294117648,
5
+ "global_step": 11000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2498
  "step": 10000
2499
  },
2500
  {
2501
+ "epoch": 589.47,
2502
+ "learning_rate": 2.8514285714285715e-07,
2503
+ "loss": 0.0002,
2504
+ "step": 10025
2505
+ },
2506
+ {
2507
+ "epoch": 590.94,
2508
+ "learning_rate": 2.78e-07,
2509
+ "loss": 0.0002,
2510
+ "step": 10050
2511
+ },
2512
+ {
2513
+ "epoch": 592.41,
2514
+ "learning_rate": 2.7085714285714285e-07,
2515
+ "loss": 0.0001,
2516
+ "step": 10075
2517
+ },
2518
+ {
2519
+ "epoch": 593.88,
2520
+ "learning_rate": 2.6371428571428574e-07,
2521
+ "loss": 0.0001,
2522
+ "step": 10100
2523
+ },
2524
+ {
2525
+ "epoch": 595.35,
2526
+ "learning_rate": 2.565714285714286e-07,
2527
+ "loss": 0.0001,
2528
+ "step": 10125
2529
+ },
2530
+ {
2531
+ "epoch": 596.82,
2532
+ "learning_rate": 2.4942857142857144e-07,
2533
+ "loss": 0.0001,
2534
+ "step": 10150
2535
+ },
2536
+ {
2537
+ "epoch": 598.29,
2538
+ "learning_rate": 2.4228571428571427e-07,
2539
+ "loss": 0.0002,
2540
+ "step": 10175
2541
+ },
2542
+ {
2543
+ "epoch": 599.76,
2544
+ "learning_rate": 2.3514285714285715e-07,
2545
+ "loss": 0.0002,
2546
+ "step": 10200
2547
+ },
2548
+ {
2549
+ "epoch": 601.24,
2550
+ "learning_rate": 2.28e-07,
2551
+ "loss": 0.0001,
2552
+ "step": 10225
2553
+ },
2554
+ {
2555
+ "epoch": 602.71,
2556
+ "learning_rate": 2.2085714285714288e-07,
2557
+ "loss": 0.0001,
2558
+ "step": 10250
2559
+ },
2560
+ {
2561
+ "epoch": 604.18,
2562
+ "learning_rate": 2.137142857142857e-07,
2563
+ "loss": 0.0001,
2564
+ "step": 10275
2565
+ },
2566
+ {
2567
+ "epoch": 605.65,
2568
+ "learning_rate": 2.0657142857142856e-07,
2569
+ "loss": 0.0001,
2570
+ "step": 10300
2571
+ },
2572
+ {
2573
+ "epoch": 607.12,
2574
+ "learning_rate": 1.9942857142857144e-07,
2575
+ "loss": 0.0001,
2576
+ "step": 10325
2577
+ },
2578
+ {
2579
+ "epoch": 608.59,
2580
+ "learning_rate": 1.922857142857143e-07,
2581
+ "loss": 0.0001,
2582
+ "step": 10350
2583
+ },
2584
+ {
2585
+ "epoch": 610.06,
2586
+ "learning_rate": 1.8514285714285715e-07,
2587
+ "loss": 0.0002,
2588
+ "step": 10375
2589
+ },
2590
+ {
2591
+ "epoch": 611.53,
2592
+ "learning_rate": 1.78e-07,
2593
+ "loss": 0.0001,
2594
+ "step": 10400
2595
+ },
2596
+ {
2597
+ "epoch": 613.0,
2598
+ "learning_rate": 1.7085714285714286e-07,
2599
+ "loss": 0.0003,
2600
+ "step": 10425
2601
+ },
2602
+ {
2603
+ "epoch": 614.47,
2604
+ "learning_rate": 1.637142857142857e-07,
2605
+ "loss": 0.0002,
2606
+ "step": 10450
2607
+ },
2608
+ {
2609
+ "epoch": 615.94,
2610
+ "learning_rate": 1.5657142857142857e-07,
2611
+ "loss": 0.0001,
2612
+ "step": 10475
2613
+ },
2614
+ {
2615
+ "epoch": 617.41,
2616
+ "learning_rate": 1.4942857142857145e-07,
2617
+ "loss": 0.0001,
2618
+ "step": 10500
2619
+ },
2620
+ {
2621
+ "epoch": 618.88,
2622
+ "learning_rate": 1.422857142857143e-07,
2623
+ "loss": 0.0002,
2624
+ "step": 10525
2625
+ },
2626
+ {
2627
+ "epoch": 620.35,
2628
+ "learning_rate": 1.3514285714285713e-07,
2629
+ "loss": 0.0001,
2630
+ "step": 10550
2631
+ },
2632
+ {
2633
+ "epoch": 621.82,
2634
+ "learning_rate": 1.28e-07,
2635
+ "loss": 0.0002,
2636
+ "step": 10575
2637
+ },
2638
+ {
2639
+ "epoch": 623.29,
2640
+ "learning_rate": 1.2085714285714286e-07,
2641
+ "loss": 0.0002,
2642
+ "step": 10600
2643
+ },
2644
+ {
2645
+ "epoch": 624.76,
2646
+ "learning_rate": 1.1371428571428573e-07,
2647
+ "loss": 0.0002,
2648
+ "step": 10625
2649
+ },
2650
+ {
2651
+ "epoch": 626.24,
2652
+ "learning_rate": 1.0657142857142858e-07,
2653
+ "loss": 0.0002,
2654
+ "step": 10650
2655
+ },
2656
+ {
2657
+ "epoch": 627.71,
2658
+ "learning_rate": 9.942857142857142e-08,
2659
+ "loss": 0.0002,
2660
+ "step": 10675
2661
+ },
2662
+ {
2663
+ "epoch": 629.18,
2664
+ "learning_rate": 9.228571428571429e-08,
2665
+ "loss": 0.0001,
2666
+ "step": 10700
2667
+ },
2668
+ {
2669
+ "epoch": 630.65,
2670
+ "learning_rate": 8.514285714285714e-08,
2671
+ "loss": 0.0001,
2672
+ "step": 10725
2673
+ },
2674
+ {
2675
+ "epoch": 632.12,
2676
+ "learning_rate": 7.8e-08,
2677
+ "loss": 0.0001,
2678
+ "step": 10750
2679
+ },
2680
+ {
2681
+ "epoch": 633.59,
2682
+ "learning_rate": 7.085714285714286e-08,
2683
+ "loss": 0.0002,
2684
+ "step": 10775
2685
+ },
2686
+ {
2687
+ "epoch": 635.06,
2688
+ "learning_rate": 6.371428571428572e-08,
2689
+ "loss": 0.0001,
2690
+ "step": 10800
2691
+ },
2692
+ {
2693
+ "epoch": 636.53,
2694
+ "learning_rate": 5.657142857142857e-08,
2695
+ "loss": 0.0001,
2696
+ "step": 10825
2697
+ },
2698
+ {
2699
+ "epoch": 638.0,
2700
+ "learning_rate": 4.942857142857143e-08,
2701
+ "loss": 0.0001,
2702
+ "step": 10850
2703
+ },
2704
+ {
2705
+ "epoch": 639.47,
2706
+ "learning_rate": 4.2285714285714285e-08,
2707
+ "loss": 0.0002,
2708
+ "step": 10875
2709
+ },
2710
+ {
2711
+ "epoch": 640.94,
2712
+ "learning_rate": 3.514285714285714e-08,
2713
+ "loss": 0.0001,
2714
+ "step": 10900
2715
+ },
2716
+ {
2717
+ "epoch": 642.41,
2718
+ "learning_rate": 2.8000000000000003e-08,
2719
+ "loss": 0.0001,
2720
+ "step": 10925
2721
+ },
2722
+ {
2723
+ "epoch": 643.88,
2724
+ "learning_rate": 2.0857142857142856e-08,
2725
+ "loss": 0.0001,
2726
+ "step": 10950
2727
+ },
2728
+ {
2729
+ "epoch": 645.35,
2730
+ "learning_rate": 1.3714285714285715e-08,
2731
+ "loss": 0.0001,
2732
+ "step": 10975
2733
+ },
2734
+ {
2735
+ "epoch": 646.82,
2736
+ "learning_rate": 6.571428571428572e-09,
2737
+ "loss": 0.0003,
2738
+ "step": 11000
2739
+ },
2740
+ {
2741
+ "epoch": 646.82,
2742
+ "eval_loss": 0.56689453125,
2743
+ "eval_runtime": 153.4429,
2744
+ "eval_samples_per_second": 1.773,
2745
+ "eval_steps_per_second": 0.111,
2746
+ "eval_wer": 9.946136701337295,
2747
+ "step": 11000
2748
+ },
2749
+ {
2750
+ "epoch": 646.82,
2751
+ "step": 11000,
2752
+ "total_flos": 3.388775639915157e+20,
2753
+ "train_loss": 1.3152035799893466e-05,
2754
+ "train_runtime": 3841.6705,
2755
+ "train_samples_per_second": 91.627,
2756
+ "train_steps_per_second": 2.863
2757
  }
2758
  ],
2759
+ "max_steps": 11000,
2760
+ "num_train_epochs": 648,
2761
+ "total_flos": 3.388775639915157e+20,
2762
  "trial_name": null,
2763
  "trial_params": null
2764
  }