plip commited on
Commit
18445f5
1 Parent(s): fcb1dc9

Training in progress, step 140000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d58d76c1f639c31e7ba95d3fafba4b0b4df7581368e50e5257ae58a95439515c
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca41469954b0cdd00c93c46873cd2afb5a1a523ea77c504489bc7efcf8bf668
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e03d62fb4f6a3907627a7fbc89817395c675b306818e9086c488f63d3ab7cd
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af6be3972f3b3e2c13dce429397ece13973069648d233609e51fd1be73a404a
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8be656d703ff88efddcc4ceee60baffceb98bce707f489ada7dcfd797db0102
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1c3de3c2d66025eca4b56b8d5efacb781ba1e9b4daae40acd9c92c0f7cbe98c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9522f343e5b1c37fe7e600f7b39d619e850350d8948dc240940a440582a0eb9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.028961532188541,
5
- "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2606,11 +2606,211 @@
2606
  "eval_samples_per_second": 1555.976,
2607
  "eval_steps_per_second": 24.777,
2608
  "step": 130000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "max_steps": 500000,
2612
  "num_train_epochs": 12,
2613
- "total_flos": 4.15332531309578e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.261958573126121,
5
+ "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 1555.976,
2607
  "eval_steps_per_second": 24.777,
2608
  "step": 130000
2609
+ },
2610
+ {
2611
+ "epoch": 3.04,
2612
+ "learning_rate": 0.0002661106834948409,
2613
+ "loss": 0.2939,
2614
+ "step": 130500
2615
+ },
2616
+ {
2617
+ "epoch": 3.05,
2618
+ "learning_rate": 0.0002658019908268041,
2619
+ "loss": 0.2937,
2620
+ "step": 131000
2621
+ },
2622
+ {
2623
+ "epoch": 3.05,
2624
+ "eval_loss": 0.27508604526519775,
2625
+ "eval_runtime": 1.4205,
2626
+ "eval_samples_per_second": 1547.302,
2627
+ "eval_steps_per_second": 24.639,
2628
+ "step": 131000
2629
+ },
2630
+ {
2631
+ "epoch": 3.06,
2632
+ "learning_rate": 0.00026549208644594766,
2633
+ "loss": 0.294,
2634
+ "step": 131500
2635
+ },
2636
+ {
2637
+ "epoch": 3.08,
2638
+ "learning_rate": 0.00026518097374133627,
2639
+ "loss": 0.2935,
2640
+ "step": 132000
2641
+ },
2642
+ {
2643
+ "epoch": 3.08,
2644
+ "eval_loss": 0.2736782431602478,
2645
+ "eval_runtime": 1.4305,
2646
+ "eval_samples_per_second": 1536.563,
2647
+ "eval_steps_per_second": 24.468,
2648
+ "step": 132000
2649
+ },
2650
+ {
2651
+ "epoch": 3.09,
2652
+ "learning_rate": 0.00026486865611524853,
2653
+ "loss": 0.2932,
2654
+ "step": 132500
2655
+ },
2656
+ {
2657
+ "epoch": 3.1,
2658
+ "learning_rate": 0.00026455513698314003,
2659
+ "loss": 0.2931,
2660
+ "step": 133000
2661
+ },
2662
+ {
2663
+ "epoch": 3.1,
2664
+ "eval_loss": 0.27545085549354553,
2665
+ "eval_runtime": 1.4136,
2666
+ "eval_samples_per_second": 1554.95,
2667
+ "eval_steps_per_second": 24.76,
2668
+ "step": 133000
2669
+ },
2670
+ {
2671
+ "epoch": 3.11,
2672
+ "learning_rate": 0.0002642404197736058,
2673
+ "loss": 0.2931,
2674
+ "step": 133500
2675
+ },
2676
+ {
2677
+ "epoch": 3.12,
2678
+ "learning_rate": 0.0002639245079283428,
2679
+ "loss": 0.2936,
2680
+ "step": 134000
2681
+ },
2682
+ {
2683
+ "epoch": 3.12,
2684
+ "eval_loss": 0.27676478028297424,
2685
+ "eval_runtime": 1.4314,
2686
+ "eval_samples_per_second": 1535.602,
2687
+ "eval_steps_per_second": 24.452,
2688
+ "step": 134000
2689
+ },
2690
+ {
2691
+ "epoch": 3.13,
2692
+ "learning_rate": 0.00026360740490211234,
2693
+ "loss": 0.2932,
2694
+ "step": 134500
2695
+ },
2696
+ {
2697
+ "epoch": 3.15,
2698
+ "learning_rate": 0.0002632891141627023,
2699
+ "loss": 0.2934,
2700
+ "step": 135000
2701
+ },
2702
+ {
2703
+ "epoch": 3.15,
2704
+ "eval_loss": 0.2775753438472748,
2705
+ "eval_runtime": 1.4316,
2706
+ "eval_samples_per_second": 1535.332,
2707
+ "eval_steps_per_second": 24.448,
2708
+ "step": 135000
2709
+ },
2710
+ {
2711
+ "epoch": 3.16,
2712
+ "learning_rate": 0.00026296963919088923,
2713
+ "loss": 0.2927,
2714
+ "step": 135500
2715
+ },
2716
+ {
2717
+ "epoch": 3.17,
2718
+ "learning_rate": 0.00026264898348040024,
2719
+ "loss": 0.2925,
2720
+ "step": 136000
2721
+ },
2722
+ {
2723
+ "epoch": 3.17,
2724
+ "eval_loss": 0.27160218358039856,
2725
+ "eval_runtime": 1.4163,
2726
+ "eval_samples_per_second": 1551.918,
2727
+ "eval_steps_per_second": 24.712,
2728
+ "step": 136000
2729
+ },
2730
+ {
2731
+ "epoch": 3.18,
2732
+ "learning_rate": 0.0002623271505378748,
2733
+ "loss": 0.2926,
2734
+ "step": 136500
2735
+ },
2736
+ {
2737
+ "epoch": 3.19,
2738
+ "learning_rate": 0.00026200414388282637,
2739
+ "loss": 0.2924,
2740
+ "step": 137000
2741
+ },
2742
+ {
2743
+ "epoch": 3.19,
2744
+ "eval_loss": 0.2737308442592621,
2745
+ "eval_runtime": 1.4266,
2746
+ "eval_samples_per_second": 1540.705,
2747
+ "eval_steps_per_second": 24.534,
2748
+ "step": 137000
2749
+ },
2750
+ {
2751
+ "epoch": 3.2,
2752
+ "learning_rate": 0.00026167996704760406,
2753
+ "loss": 0.2929,
2754
+ "step": 137500
2755
+ },
2756
+ {
2757
+ "epoch": 3.22,
2758
+ "learning_rate": 0.00026135462357735375,
2759
+ "loss": 0.2931,
2760
+ "step": 138000
2761
+ },
2762
+ {
2763
+ "epoch": 3.22,
2764
+ "eval_loss": 0.2752697467803955,
2765
+ "eval_runtime": 1.4495,
2766
+ "eval_samples_per_second": 1516.388,
2767
+ "eval_steps_per_second": 24.146,
2768
+ "step": 138000
2769
+ },
2770
+ {
2771
+ "epoch": 3.23,
2772
+ "learning_rate": 0.0002610281170299795,
2773
+ "loss": 0.292,
2774
+ "step": 138500
2775
+ },
2776
+ {
2777
+ "epoch": 3.24,
2778
+ "learning_rate": 0.00026070045097610465,
2779
+ "loss": 0.2917,
2780
+ "step": 139000
2781
+ },
2782
+ {
2783
+ "epoch": 3.24,
2784
+ "eval_loss": 0.2744641602039337,
2785
+ "eval_runtime": 1.4101,
2786
+ "eval_samples_per_second": 1558.726,
2787
+ "eval_steps_per_second": 24.82,
2788
+ "step": 139000
2789
+ },
2790
+ {
2791
+ "epoch": 3.25,
2792
+ "learning_rate": 0.0002603716289990326,
2793
+ "loss": 0.2923,
2794
+ "step": 139500
2795
+ },
2796
+ {
2797
+ "epoch": 3.26,
2798
+ "learning_rate": 0.00026004165469470787,
2799
+ "loss": 0.2918,
2800
+ "step": 140000
2801
+ },
2802
+ {
2803
+ "epoch": 3.26,
2804
+ "eval_loss": 0.2773716449737549,
2805
+ "eval_runtime": 1.4122,
2806
+ "eval_samples_per_second": 1556.477,
2807
+ "eval_steps_per_second": 24.785,
2808
+ "step": 140000
2809
  }
2810
  ],
2811
  "max_steps": 500000,
2812
  "num_train_epochs": 12,
2813
+ "total_flos": 4.4728124518702797e+21,
2814
  "trial_name": null,
2815
  "trial_params": null
2816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e03d62fb4f6a3907627a7fbc89817395c675b306818e9086c488f63d3ab7cd
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af6be3972f3b3e2c13dce429397ece13973069648d233609e51fd1be73a404a
3
  size 102501541