plip commited on
Commit
a1d0628
·
1 Parent(s): 18445f5

Training in progress, step 150000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aca41469954b0cdd00c93c46873cd2afb5a1a523ea77c504489bc7efcf8bf668
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ecaa40659da5359d9e6f7d069ca323ca4c45fa18fcb4784f875bec7976d461
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af6be3972f3b3e2c13dce429397ece13973069648d233609e51fd1be73a404a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94d64a0ae504309548136437084f536028b5f9b3d523c385c79db34cb72c3bb
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f52d61c93314d406f5efbb593d2fee6f58fe805cd1be44b93e8b9a7a77fd617
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce01b23c0fe6122788b87f39f9d97726d4354152bd24d5ac8cf11369955b4ef3
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9522f343e5b1c37fe7e600f7b39d619e850350d8948dc240940a440582a0eb9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09454c2cc8aac6faf99c7970cf8bec57b59b683887c6bb291c3a0936d0e7d0d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.261958573126121,
5
- "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2806,11 +2806,211 @@
2806
  "eval_samples_per_second": 1556.477,
2807
  "eval_steps_per_second": 24.785,
2808
  "step": 140000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2809
  }
2810
  ],
2811
  "max_steps": 500000,
2812
  "num_train_epochs": 12,
2813
- "total_flos": 4.4728124518702797e+21,
2814
  "trial_name": null,
2815
  "trial_params": null
2816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.4949556140637013,
5
+ "global_step": 150000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2806
  "eval_samples_per_second": 1556.477,
2807
  "eval_steps_per_second": 24.785,
2808
  "step": 140000
2809
+ },
2810
+ {
2811
+ "epoch": 3.27,
2812
+ "learning_rate": 0.0002597105316716766,
2813
+ "loss": 0.2921,
2814
+ "step": 140500
2815
+ },
2816
+ {
2817
+ "epoch": 3.29,
2818
+ "learning_rate": 0.000259378263551047,
2819
+ "loss": 0.2914,
2820
+ "step": 141000
2821
+ },
2822
+ {
2823
+ "epoch": 3.29,
2824
+ "eval_loss": 0.2755984961986542,
2825
+ "eval_runtime": 1.4384,
2826
+ "eval_samples_per_second": 1528.099,
2827
+ "eval_steps_per_second": 24.333,
2828
+ "step": 141000
2829
+ },
2830
+ {
2831
+ "epoch": 3.3,
2832
+ "learning_rate": 0.0002590448539664501,
2833
+ "loss": 0.2915,
2834
+ "step": 141500
2835
+ },
2836
+ {
2837
+ "epoch": 3.31,
2838
+ "learning_rate": 0.00025871030656399966,
2839
+ "loss": 0.2913,
2840
+ "step": 142000
2841
+ },
2842
+ {
2843
+ "epoch": 3.31,
2844
+ "eval_loss": 0.2742982804775238,
2845
+ "eval_runtime": 1.3825,
2846
+ "eval_samples_per_second": 1589.856,
2847
+ "eval_steps_per_second": 25.316,
2848
+ "step": 142000
2849
+ },
2850
+ {
2851
+ "epoch": 3.32,
2852
+ "learning_rate": 0.00025837462500225255,
2853
+ "loss": 0.2912,
2854
+ "step": 142500
2855
+ },
2856
+ {
2857
+ "epoch": 3.33,
2858
+ "learning_rate": 0.0002580378129521685,
2859
+ "loss": 0.2909,
2860
+ "step": 143000
2861
+ },
2862
+ {
2863
+ "epoch": 3.33,
2864
+ "eval_loss": 0.2741672694683075,
2865
+ "eval_runtime": 1.3884,
2866
+ "eval_samples_per_second": 1583.139,
2867
+ "eval_steps_per_second": 25.209,
2868
+ "step": 143000
2869
+ },
2870
+ {
2871
+ "epoch": 3.34,
2872
+ "learning_rate": 0.0002576998740970701,
2873
+ "loss": 0.2907,
2874
+ "step": 143500
2875
+ },
2876
+ {
2877
+ "epoch": 3.36,
2878
+ "learning_rate": 0.00025736081213260253,
2879
+ "loss": 0.2911,
2880
+ "step": 144000
2881
+ },
2882
+ {
2883
+ "epoch": 3.36,
2884
+ "eval_loss": 0.27347105741500854,
2885
+ "eval_runtime": 1.4089,
2886
+ "eval_samples_per_second": 1560.053,
2887
+ "eval_steps_per_second": 24.842,
2888
+ "step": 144000
2889
+ },
2890
+ {
2891
+ "epoch": 3.37,
2892
+ "learning_rate": 0.0002570206307666931,
2893
+ "loss": 0.2905,
2894
+ "step": 144500
2895
+ },
2896
+ {
2897
+ "epoch": 3.38,
2898
+ "learning_rate": 0.0002566793337195108,
2899
+ "loss": 0.2904,
2900
+ "step": 145000
2901
+ },
2902
+ {
2903
+ "epoch": 3.38,
2904
+ "eval_loss": 0.27271768450737,
2905
+ "eval_runtime": 1.4129,
2906
+ "eval_samples_per_second": 1555.627,
2907
+ "eval_steps_per_second": 24.771,
2908
+ "step": 145000
2909
+ },
2910
+ {
2911
+ "epoch": 3.39,
2912
+ "learning_rate": 0.0002563369247234254,
2913
+ "loss": 0.29,
2914
+ "step": 145500
2915
+ },
2916
+ {
2917
+ "epoch": 3.4,
2918
+ "learning_rate": 0.0002559934075229669,
2919
+ "loss": 0.2903,
2920
+ "step": 146000
2921
+ },
2922
+ {
2923
+ "epoch": 3.4,
2924
+ "eval_loss": 0.27478328347206116,
2925
+ "eval_runtime": 1.4292,
2926
+ "eval_samples_per_second": 1537.886,
2927
+ "eval_steps_per_second": 24.489,
2928
+ "step": 146000
2929
+ },
2930
+ {
2931
+ "epoch": 3.41,
2932
+ "learning_rate": 0.0002556487858747843,
2933
+ "loss": 0.2904,
2934
+ "step": 146500
2935
+ },
2936
+ {
2937
+ "epoch": 3.43,
2938
+ "learning_rate": 0.00025530306354760464,
2939
+ "loss": 0.2899,
2940
+ "step": 147000
2941
+ },
2942
+ {
2943
+ "epoch": 3.43,
2944
+ "eval_loss": 0.2697647213935852,
2945
+ "eval_runtime": 1.4585,
2946
+ "eval_samples_per_second": 1506.981,
2947
+ "eval_steps_per_second": 23.997,
2948
+ "step": 147000
2949
+ },
2950
+ {
2951
+ "epoch": 3.44,
2952
+ "learning_rate": 0.000254956244322192,
2953
+ "loss": 0.2899,
2954
+ "step": 147500
2955
+ },
2956
+ {
2957
+ "epoch": 3.45,
2958
+ "learning_rate": 0.00025460833199130595,
2959
+ "loss": 0.2896,
2960
+ "step": 148000
2961
+ },
2962
+ {
2963
+ "epoch": 3.45,
2964
+ "eval_loss": 0.27176716923713684,
2965
+ "eval_runtime": 1.4122,
2966
+ "eval_samples_per_second": 1556.39,
2967
+ "eval_steps_per_second": 24.783,
2968
+ "step": 148000
2969
+ },
2970
+ {
2971
+ "epoch": 3.46,
2972
+ "learning_rate": 0.00025425933035965983,
2973
+ "loss": 0.2895,
2974
+ "step": 148500
2975
+ },
2976
+ {
2977
+ "epoch": 3.47,
2978
+ "learning_rate": 0.00025390924324387965,
2979
+ "loss": 0.2895,
2980
+ "step": 149000
2981
+ },
2982
+ {
2983
+ "epoch": 3.47,
2984
+ "eval_loss": 0.27247366309165955,
2985
+ "eval_runtime": 1.4208,
2986
+ "eval_samples_per_second": 1547.057,
2987
+ "eval_steps_per_second": 24.635,
2988
+ "step": 149000
2989
+ },
2990
+ {
2991
+ "epoch": 3.48,
2992
+ "learning_rate": 0.0002535580744724621,
2993
+ "loss": 0.2892,
2994
+ "step": 149500
2995
+ },
2996
+ {
2997
+ "epoch": 3.49,
2998
+ "learning_rate": 0.00025320582788573246,
2999
+ "loss": 0.2892,
3000
+ "step": 150000
3001
+ },
3002
+ {
3003
+ "epoch": 3.49,
3004
+ "eval_loss": 0.2716062366962433,
3005
+ "eval_runtime": 1.4082,
3006
+ "eval_samples_per_second": 1560.897,
3007
+ "eval_steps_per_second": 24.855,
3008
+ "step": 150000
3009
  }
3010
  ],
3011
  "max_steps": 500000,
3012
  "num_train_epochs": 12,
3013
+ "total_flos": 4.79229959064478e+21,
3014
  "trial_name": null,
3015
  "trial_params": null
3016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af6be3972f3b3e2c13dce429397ece13973069648d233609e51fd1be73a404a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94d64a0ae504309548136437084f536028b5f9b3d523c385c79db34cb72c3bb
3
  size 102501541