lemon-mint commited on
Commit
32c3b3a
·
verified ·
1 Parent(s): 4de95e1

Training in progress, step 2136, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd1eca6c800ef6825e7bf60a3f606c08a3664621930ae86599328032ddfc0a0a
3
  size 4903351912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffed5f8c81d4bcc1c3949f7ae2640c2980c4515a7bf471d7277700e4a42dc62
3
  size 4903351912
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e4678c8c596ac9adbe11b26a3e6cf61fda2a78b8c8aa1d36e117e0b4b9dd9b8
3
  size 4947570872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2776b66179020970a8f8df643b6376fa54d907b9a0ab7de3152df871e8519472
3
  size 4947570872
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be4ffd13d9c3bc7db9d2f6de30ddc485decf506fd2322157d81e0b581d71538a
3
  size 4962221464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:637c9d5ccb29c2571a60e81aba1229d7c57a7860baf764cefb86807c1fd55c51
3
  size 4962221464
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b620e3f5d53efc08164bcbe9887fa6c2b5101cde21efdb03ca2b3fcf5b3b67ad
3
  size 3670322200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e6a180710afa8be7623704d675a1486d890f8acd593108fd30e06f8faf9d99
3
  size 3670322200
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f485548cbbc71688a7015971c1a5ece38765abc0497df7c8a97a11895a5364a5
3
  size 2216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe0c40a4a813653ed70995efe5d8a8fed35d5d52cf0ca2d406ae22aa69dd62dc
3
  size 2216
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:459e4f6348b09807a678fef615a08119a6cb3d845b8e137dc15cc9629d38a1d8
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638959202e00ec8e922c9fefb3271344d643c48007a3ce5c5efbd2a02e4157e6
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9662921348314608,
5
  "eval_steps": 500,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14707,6 +14707,258 @@
14707
  "learning_rate": 3.106191336121222e-08,
14708
  "loss": 1.1671,
14709
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14710
  }
14711
  ],
14712
  "logging_steps": 1,
@@ -14721,12 +14973,12 @@
14721
  "should_evaluate": false,
14722
  "should_log": false,
14723
  "should_save": true,
14724
- "should_training_stop": false
14725
  },
14726
  "attributes": {}
14727
  }
14728
  },
14729
- "total_flos": 6.871693999535555e+18,
14730
  "train_batch_size": 8,
14731
  "trial_name": null,
14732
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 2136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14707
  "learning_rate": 3.106191336121222e-08,
14708
  "loss": 1.1671,
14709
  "step": 2100
14710
+ },
14711
+ {
14712
+ "epoch": 1.9672284644194757,
14713
+ "grad_norm": 0.0,
14714
+ "learning_rate": 2.936063551628454e-08,
14715
+ "loss": 1.1406,
14716
+ "step": 2101
14717
+ },
14718
+ {
14719
+ "epoch": 1.9681647940074907,
14720
+ "grad_norm": 0.0,
14721
+ "learning_rate": 2.7707234737970325e-08,
14722
+ "loss": 1.155,
14723
+ "step": 2102
14724
+ },
14725
+ {
14726
+ "epoch": 1.9691011235955056,
14727
+ "grad_norm": 0.0,
14728
+ "learning_rate": 2.6101714990083292e-08,
14729
+ "loss": 1.1179,
14730
+ "step": 2103
14731
+ },
14732
+ {
14733
+ "epoch": 1.9700374531835205,
14734
+ "grad_norm": 0.0,
14735
+ "learning_rate": 2.4544080121657877e-08,
14736
+ "loss": 1.1077,
14737
+ "step": 2104
14738
+ },
14739
+ {
14740
+ "epoch": 1.9709737827715355,
14741
+ "grad_norm": 0.0,
14742
+ "learning_rate": 2.3034333866922555e-08,
14743
+ "loss": 1.1038,
14744
+ "step": 2105
14745
+ },
14746
+ {
14747
+ "epoch": 1.9719101123595506,
14748
+ "grad_norm": 0.0,
14749
+ "learning_rate": 2.1572479845299865e-08,
14750
+ "loss": 1.1178,
14751
+ "step": 2106
14752
+ },
14753
+ {
14754
+ "epoch": 1.9728464419475655,
14755
+ "grad_norm": 0.0,
14756
+ "learning_rate": 2.0158521561404188e-08,
14757
+ "loss": 1.1605,
14758
+ "step": 2107
14759
+ },
14760
+ {
14761
+ "epoch": 1.9737827715355807,
14762
+ "grad_norm": 0.0,
14763
+ "learning_rate": 1.879246240501509e-08,
14764
+ "loss": 1.1151,
14765
+ "step": 2108
14766
+ },
14767
+ {
14768
+ "epoch": 1.9747191011235956,
14769
+ "grad_norm": 0.0,
14770
+ "learning_rate": 1.747430565108843e-08,
14771
+ "loss": 1.1017,
14772
+ "step": 2109
14773
+ },
14774
+ {
14775
+ "epoch": 1.9756554307116105,
14776
+ "grad_norm": 0.0,
14777
+ "learning_rate": 1.6204054459736385e-08,
14778
+ "loss": 1.1408,
14779
+ "step": 2110
14780
+ },
14781
+ {
14782
+ "epoch": 1.9765917602996255,
14783
+ "grad_norm": 0.0,
14784
+ "learning_rate": 1.4981711876227435e-08,
14785
+ "loss": 1.1019,
14786
+ "step": 2111
14787
+ },
14788
+ {
14789
+ "epoch": 1.9775280898876404,
14790
+ "grad_norm": 0.0,
14791
+ "learning_rate": 1.3807280830968606e-08,
14792
+ "loss": 1.1559,
14793
+ "step": 2112
14794
+ },
14795
+ {
14796
+ "epoch": 1.9784644194756553,
14797
+ "grad_norm": 0.0,
14798
+ "learning_rate": 1.2680764139509915e-08,
14799
+ "loss": 1.1168,
14800
+ "step": 2113
14801
+ },
14802
+ {
14803
+ "epoch": 1.9794007490636703,
14804
+ "grad_norm": 0.0,
14805
+ "learning_rate": 1.1602164502531043e-08,
14806
+ "loss": 1.1662,
14807
+ "step": 2114
14808
+ },
14809
+ {
14810
+ "epoch": 1.9803370786516854,
14811
+ "grad_norm": 0.0,
14812
+ "learning_rate": 1.0571484505839114e-08,
14813
+ "loss": 1.1558,
14814
+ "step": 2115
14815
+ },
14816
+ {
14817
+ "epoch": 1.9812734082397003,
14818
+ "grad_norm": 0.0,
14819
+ "learning_rate": 9.588726620357591e-09,
14820
+ "loss": 1.1725,
14821
+ "step": 2116
14822
+ },
14823
+ {
14824
+ "epoch": 1.9822097378277155,
14825
+ "grad_norm": 0.0,
14826
+ "learning_rate": 8.653893202124064e-09,
14827
+ "loss": 1.1049,
14828
+ "step": 2117
14829
+ },
14830
+ {
14831
+ "epoch": 1.9831460674157304,
14832
+ "grad_norm": 0.0,
14833
+ "learning_rate": 7.76698649228136e-09,
14834
+ "loss": 1.1635,
14835
+ "step": 2118
14836
+ },
14837
+ {
14838
+ "epoch": 1.9840823970037453,
14839
+ "grad_norm": 0.0,
14840
+ "learning_rate": 6.928008617077542e-09,
14841
+ "loss": 1.1132,
14842
+ "step": 2119
14843
+ },
14844
+ {
14845
+ "epoch": 1.9850187265917603,
14846
+ "grad_norm": 0.0,
14847
+ "learning_rate": 6.136961587852597e-09,
14848
+ "loss": 1.1769,
14849
+ "step": 2120
14850
+ },
14851
+ {
14852
+ "epoch": 1.9859550561797752,
14853
+ "grad_norm": 0.0,
14854
+ "learning_rate": 5.393847301042865e-09,
14855
+ "loss": 1.1298,
14856
+ "step": 2121
14857
+ },
14858
+ {
14859
+ "epoch": 1.9868913857677901,
14860
+ "grad_norm": 0.0,
14861
+ "learning_rate": 4.698667538169943e-09,
14862
+ "loss": 1.1547,
14863
+ "step": 2122
14864
+ },
14865
+ {
14866
+ "epoch": 1.9878277153558053,
14867
+ "grad_norm": 0.0,
14868
+ "learning_rate": 4.051423965838464e-09,
14869
+ "loss": 1.1608,
14870
+ "step": 2123
14871
+ },
14872
+ {
14873
+ "epoch": 1.9887640449438202,
14874
+ "grad_norm": 0.0,
14875
+ "learning_rate": 3.4521181357316523e-09,
14876
+ "loss": 1.1669,
14877
+ "step": 2124
14878
+ },
14879
+ {
14880
+ "epoch": 1.9897003745318353,
14881
+ "grad_norm": 0.0,
14882
+ "learning_rate": 2.9007514846113304e-09,
14883
+ "loss": 1.167,
14884
+ "step": 2125
14885
+ },
14886
+ {
14887
+ "epoch": 1.9906367041198503,
14888
+ "grad_norm": 0.0,
14889
+ "learning_rate": 2.397325334309031e-09,
14890
+ "loss": 1.1729,
14891
+ "step": 2126
14892
+ },
14893
+ {
14894
+ "epoch": 1.9915730337078652,
14895
+ "grad_norm": 0.0,
14896
+ "learning_rate": 1.941840891721558e-09,
14897
+ "loss": 1.1634,
14898
+ "step": 2127
14899
+ },
14900
+ {
14901
+ "epoch": 1.9925093632958801,
14902
+ "grad_norm": 0.0,
14903
+ "learning_rate": 1.53429924881765e-09,
14904
+ "loss": 1.1068,
14905
+ "step": 2128
14906
+ },
14907
+ {
14908
+ "epoch": 1.993445692883895,
14909
+ "grad_norm": 0.0,
14910
+ "learning_rate": 1.174701382626875e-09,
14911
+ "loss": 1.1238,
14912
+ "step": 2129
14913
+ },
14914
+ {
14915
+ "epoch": 1.99438202247191,
14916
+ "grad_norm": 0.0,
14917
+ "learning_rate": 8.63048155235191e-10,
14918
+ "loss": 1.113,
14919
+ "step": 2130
14920
+ },
14921
+ {
14922
+ "epoch": 1.9953183520599251,
14923
+ "grad_norm": 0.0,
14924
+ "learning_rate": 5.99340313798269e-10,
14925
+ "loss": 1.1078,
14926
+ "step": 2131
14927
+ },
14928
+ {
14929
+ "epoch": 1.99625468164794,
14930
+ "grad_norm": 0.0,
14931
+ "learning_rate": 3.8357849051484655e-10,
14932
+ "loss": 1.102,
14933
+ "step": 2132
14934
+ },
14935
+ {
14936
+ "epoch": 1.9971910112359552,
14937
+ "grad_norm": 0.0,
14938
+ "learning_rate": 2.1576320265337403e-10,
14939
+ "loss": 1.0998,
14940
+ "step": 2133
14941
+ },
14942
+ {
14943
+ "epoch": 1.9981273408239701,
14944
+ "grad_norm": 0.0,
14945
+ "learning_rate": 9.58948525253689e-11,
14946
+ "loss": 1.1516,
14947
+ "step": 2134
14948
+ },
14949
+ {
14950
+ "epoch": 1.999063670411985,
14951
+ "grad_norm": 0.0,
14952
+ "learning_rate": 2.3973727498738387e-11,
14953
+ "loss": 1.1345,
14954
+ "step": 2135
14955
+ },
14956
+ {
14957
+ "epoch": 2.0,
14958
+ "grad_norm": 0.0,
14959
+ "learning_rate": 0.0,
14960
+ "loss": 1.2175,
14961
+ "step": 2136
14962
  }
14963
  ],
14964
  "logging_steps": 1,
 
14973
  "should_evaluate": false,
14974
  "should_log": false,
14975
  "should_save": true,
14976
+ "should_training_stop": true
14977
  },
14978
  "attributes": {}
14979
  }
14980
  },
14981
+ "total_flos": 6.987483782320226e+18,
14982
  "train_batch_size": 8,
14983
  "trial_name": null,
14984
  "trial_params": null