mtzig commited on
Commit
21e6b11
·
verified ·
1 Parent(s): 8bfdc48

Training in progress, step 6777, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:965566f8b9a741a6f2801dc78e4fbc5ac70240c8d6d7b5570ba0182bcd9674e9
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e2e9047c1b951991a3cb533b5422cf65b7fdd99fce52ba9dad6cd543430b657
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7212ed89906b0804da8eba1f5c500d042a2a31b594b63c7afc77b7fca62b4f05
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65562a7ad6cabc3a9a834233cb6b2a6418ba0f2a4995c24942bae3fcdd716740
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93ad7abb665289229475a0dc55018b7ca6c10b70ef45f15c0b9b8f137cc5c291
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1095a5622f7ae7057612f380cf391d514c18a62ac77c825675a6caf6ee67c65
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a28d9e945552a66feca51fc9780b294ee621de58c9db83d3aefe7462105d0d49
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0991fb538d2fcdd7133b88b57d7408634c5853d2bea2f8795c0d2a2c04be8c2a
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49977e9fb46265ba81ad5ce120a7b938b5fafa454d7bb632a57a63f975e9f54a
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68623acd4c5451515db193d2aa04b8145b1f1f36417e52c36086f855dd8b168d
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a633db66552fb787cb1151b9a3e2e30b0293e84603ef7d545351fc947c5f219
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32076e376b49ea017c893743edd08119946ffa77fa889406c367ad701334a0e
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f2168060d5d243a5dda1e0bc7482749ed6c7fc4cb39ff029c8a95d29643dcf6
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be31033ff0655091be7d5cf4fb0b2133466588c00cd4b8a9f31a6082a8afcee4
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:581e13951882957811a470d66e41e45bbc9bb66544ca2d6e3568683cc9866887
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef8509d1cfc7f1a4958331fec1ee3edaa3e6165fe80cc849e478a187e2339e3
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:091b7cd663405f9e474cf640b71ae20df31b45b8cceb2d74232e5c4232ae67f5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712216dc073e1dfa28326996a976c217850297a16301f8f5ccfa922b1a5dc3ee
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:951da6c961efeea8abd4771cf6f335146152fd6e811aedd9376cfbaf0b5c2661
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9028ecc7ecc1acc6c8525827a2279969075801179a1f74a70512f85eeab8816a
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48de50c66a37a5de2f7b1873acf38375f58754d859c5eb82d7fe707070cddd0c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f69e552de9eb6a6f61b8231a8ce384288d7ac46d1fed91935b9e5cbb21762087
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2136ce865bd31b51bdee33783218e662d324835501f13ef2cf89d65f472e3f07
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633aeca10962285051f888adc2c2c409b0bf3999362ccda037a788110e685916
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f3d454f05c0bda87b3125802c8738baab69763f7e63757668c9f80a78618863
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a317dad97e27324d1b8604ef54c8e6a4cccd5fd5efea166b6eda97fc4edd76b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9885651051272594,
5
  "eval_steps": 20,
6
- "global_step": 6700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -50939,6 +50939,581 @@
50939
  "eval_samples_per_second": 5.902,
50940
  "eval_steps_per_second": 0.203,
50941
  "step": 6700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50942
  }
50943
  ],
50944
  "logging_steps": 1,
@@ -50953,12 +51528,12 @@
50953
  "should_evaluate": false,
50954
  "should_log": false,
50955
  "should_save": true,
50956
- "should_training_stop": false
50957
  },
50958
  "attributes": {}
50959
  }
50960
  },
50961
- "total_flos": 2.0639708098351596e+18,
50962
  "train_batch_size": 8,
50963
  "trial_name": null,
50964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.999926226484692,
5
  "eval_steps": 20,
6
+ "global_step": 6777,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
50939
  "eval_samples_per_second": 5.902,
50940
  "eval_steps_per_second": 0.203,
50941
  "step": 6700
50942
+ },
50943
+ {
50944
+ "epoch": 0.9887126521578753,
50945
+ "grad_norm": 0.30346840620040894,
50946
+ "learning_rate": 7.661683413868748e-09,
50947
+ "loss": 0.0029,
50948
+ "step": 6701
50949
+ },
50950
+ {
50951
+ "epoch": 0.9888601991884913,
50952
+ "grad_norm": 3.611311674118042,
50953
+ "learning_rate": 7.461411546916264e-09,
50954
+ "loss": 0.0608,
50955
+ "step": 6702
50956
+ },
50957
+ {
50958
+ "epoch": 0.9890077462191074,
50959
+ "grad_norm": 3.273043632507324,
50960
+ "learning_rate": 7.263790976492769e-09,
50961
+ "loss": 0.0737,
50962
+ "step": 6703
50963
+ },
50964
+ {
50965
+ "epoch": 0.9891552932497234,
50966
+ "grad_norm": 1.5908385515213013,
50967
+ "learning_rate": 7.068821755030763e-09,
50968
+ "loss": 0.0437,
50969
+ "step": 6704
50970
+ },
50971
+ {
50972
+ "epoch": 0.9893028402803393,
50973
+ "grad_norm": 2.5678610801696777,
50974
+ "learning_rate": 6.876503934262202e-09,
50975
+ "loss": 0.1046,
50976
+ "step": 6705
50977
+ },
50978
+ {
50979
+ "epoch": 0.9894503873109554,
50980
+ "grad_norm": 1.9680798053741455,
50981
+ "learning_rate": 6.686837565215154e-09,
50982
+ "loss": 0.0491,
50983
+ "step": 6706
50984
+ },
50985
+ {
50986
+ "epoch": 0.9895979343415714,
50987
+ "grad_norm": 3.603398084640503,
50988
+ "learning_rate": 6.499822698210478e-09,
50989
+ "loss": 0.0613,
50990
+ "step": 6707
50991
+ },
50992
+ {
50993
+ "epoch": 0.9897454813721874,
50994
+ "grad_norm": 2.4477665424346924,
50995
+ "learning_rate": 6.315459382871813e-09,
50996
+ "loss": 0.0667,
50997
+ "step": 6708
50998
+ },
50999
+ {
51000
+ "epoch": 0.9898930284028034,
51001
+ "grad_norm": 1.95452082157135,
51002
+ "learning_rate": 6.1337476681122556e-09,
51003
+ "loss": 0.0635,
51004
+ "step": 6709
51005
+ },
51006
+ {
51007
+ "epoch": 0.9900405754334194,
51008
+ "grad_norm": 0.929094672203064,
51009
+ "learning_rate": 5.95468760214879e-09,
51010
+ "loss": 0.0181,
51011
+ "step": 6710
51012
+ },
51013
+ {
51014
+ "epoch": 0.9901881224640354,
51015
+ "grad_norm": 1.7649720907211304,
51016
+ "learning_rate": 5.77827923248786e-09,
51017
+ "loss": 0.0337,
51018
+ "step": 6711
51019
+ },
51020
+ {
51021
+ "epoch": 0.9903356694946515,
51022
+ "grad_norm": 2.4020333290100098,
51023
+ "learning_rate": 5.604522605937579e-09,
51024
+ "loss": 0.0745,
51025
+ "step": 6712
51026
+ },
51027
+ {
51028
+ "epoch": 0.9904832165252674,
51029
+ "grad_norm": 1.8048148155212402,
51030
+ "learning_rate": 5.433417768598847e-09,
51031
+ "loss": 0.0588,
51032
+ "step": 6713
51033
+ },
51034
+ {
51035
+ "epoch": 0.9906307635558834,
51036
+ "grad_norm": 1.9580947160720825,
51037
+ "learning_rate": 5.264964765870906e-09,
51038
+ "loss": 0.0477,
51039
+ "step": 6714
51040
+ },
51041
+ {
51042
+ "epoch": 0.9907783105864995,
51043
+ "grad_norm": 1.246323823928833,
51044
+ "learning_rate": 5.099163642449112e-09,
51045
+ "loss": 0.0336,
51046
+ "step": 6715
51047
+ },
51048
+ {
51049
+ "epoch": 0.9909258576171155,
51050
+ "grad_norm": 1.8688416481018066,
51051
+ "learning_rate": 4.9360144423260535e-09,
51052
+ "loss": 0.0259,
51053
+ "step": 6716
51054
+ },
51055
+ {
51056
+ "epoch": 0.9910734046477314,
51057
+ "grad_norm": 1.5381361246109009,
51058
+ "learning_rate": 4.775517208788217e-09,
51059
+ "loss": 0.0298,
51060
+ "step": 6717
51061
+ },
51062
+ {
51063
+ "epoch": 0.9912209516783475,
51064
+ "grad_norm": 1.709076166152954,
51065
+ "learning_rate": 4.617671984420425e-09,
51066
+ "loss": 0.0542,
51067
+ "step": 6718
51068
+ },
51069
+ {
51070
+ "epoch": 0.9913684987089635,
51071
+ "grad_norm": 1.3890794515609741,
51072
+ "learning_rate": 4.462478811104731e-09,
51073
+ "loss": 0.0558,
51074
+ "step": 6719
51075
+ },
51076
+ {
51077
+ "epoch": 0.9915160457395795,
51078
+ "grad_norm": 0.8841899633407593,
51079
+ "learning_rate": 4.309937730015978e-09,
51080
+ "loss": 0.0172,
51081
+ "step": 6720
51082
+ },
51083
+ {
51084
+ "epoch": 0.9915160457395795,
51085
+ "eval_accuracy": 0.9782923299565847,
51086
+ "eval_f1": 0.9629629629629629,
51087
+ "eval_loss": 0.055379100143909454,
51088
+ "eval_precision": 0.9798994974874372,
51089
+ "eval_recall": 0.9466019417475728,
51090
+ "eval_runtime": 50.3029,
51091
+ "eval_samples_per_second": 5.785,
51092
+ "eval_steps_per_second": 0.199,
51093
+ "step": 6720
51094
+ },
51095
+ {
51096
+ "epoch": 0.9916635927701954,
51097
+ "grad_norm": 1.6927621364593506,
51098
+ "learning_rate": 4.160048781628456e-09,
51099
+ "loss": 0.0152,
51100
+ "step": 6721
51101
+ },
51102
+ {
51103
+ "epoch": 0.9918111398008115,
51104
+ "grad_norm": 0.853327214717865,
51105
+ "learning_rate": 4.012812005712574e-09,
51106
+ "loss": 0.0098,
51107
+ "step": 6722
51108
+ },
51109
+ {
51110
+ "epoch": 0.9919586868314275,
51111
+ "grad_norm": 3.5236921310424805,
51112
+ "learning_rate": 3.8682274413337495e-09,
51113
+ "loss": 0.0527,
51114
+ "step": 6723
51115
+ },
51116
+ {
51117
+ "epoch": 0.9921062338620436,
51118
+ "grad_norm": 2.9586477279663086,
51119
+ "learning_rate": 3.726295126853519e-09,
51120
+ "loss": 0.0441,
51121
+ "step": 6724
51122
+ },
51123
+ {
51124
+ "epoch": 0.9922537808926596,
51125
+ "grad_norm": 0.3246397376060486,
51126
+ "learning_rate": 3.5870150999317567e-09,
51127
+ "loss": 0.0029,
51128
+ "step": 6725
51129
+ },
51130
+ {
51131
+ "epoch": 0.9924013279232755,
51132
+ "grad_norm": 1.3314762115478516,
51133
+ "learning_rate": 3.450387397522237e-09,
51134
+ "loss": 0.025,
51135
+ "step": 6726
51136
+ },
51137
+ {
51138
+ "epoch": 0.9925488749538915,
51139
+ "grad_norm": 1.7466126680374146,
51140
+ "learning_rate": 3.3164120558759614e-09,
51141
+ "loss": 0.0336,
51142
+ "step": 6727
51143
+ },
51144
+ {
51145
+ "epoch": 0.9926964219845076,
51146
+ "grad_norm": 0.8817312121391296,
51147
+ "learning_rate": 3.185089110542272e-09,
51148
+ "loss": 0.0208,
51149
+ "step": 6728
51150
+ },
51151
+ {
51152
+ "epoch": 0.9928439690151236,
51153
+ "grad_norm": 1.9117987155914307,
51154
+ "learning_rate": 3.056418596362187e-09,
51155
+ "loss": 0.0431,
51156
+ "step": 6729
51157
+ },
51158
+ {
51159
+ "epoch": 0.9929915160457395,
51160
+ "grad_norm": 5.536546230316162,
51161
+ "learning_rate": 2.9304005474761755e-09,
51162
+ "loss": 0.1609,
51163
+ "step": 6730
51164
+ },
51165
+ {
51166
+ "epoch": 0.9931390630763556,
51167
+ "grad_norm": 4.430239677429199,
51168
+ "learning_rate": 2.8070349973219336e-09,
51169
+ "loss": 0.1037,
51170
+ "step": 6731
51171
+ },
51172
+ {
51173
+ "epoch": 0.9932866101069716,
51174
+ "grad_norm": 1.1493936777114868,
51175
+ "learning_rate": 2.6863219786299466e-09,
51176
+ "loss": 0.0348,
51177
+ "step": 6732
51178
+ },
51179
+ {
51180
+ "epoch": 0.9934341571375876,
51181
+ "grad_norm": 1.1461900472640991,
51182
+ "learning_rate": 2.568261523430149e-09,
51183
+ "loss": 0.0286,
51184
+ "step": 6733
51185
+ },
51186
+ {
51187
+ "epoch": 0.9935817041682036,
51188
+ "grad_norm": 1.5313574075698853,
51189
+ "learning_rate": 2.452853663046373e-09,
51190
+ "loss": 0.0368,
51191
+ "step": 6734
51192
+ },
51193
+ {
51194
+ "epoch": 0.9937292511988196,
51195
+ "grad_norm": 1.981681227684021,
51196
+ "learning_rate": 2.3400984280985695e-09,
51197
+ "loss": 0.0701,
51198
+ "step": 6735
51199
+ },
51200
+ {
51201
+ "epoch": 0.9938767982294356,
51202
+ "grad_norm": 2.576758623123169,
51203
+ "learning_rate": 2.229995848506139e-09,
51204
+ "loss": 0.0525,
51205
+ "step": 6736
51206
+ },
51207
+ {
51208
+ "epoch": 0.9940243452600517,
51209
+ "grad_norm": 2.1853508949279785,
51210
+ "learning_rate": 2.1225459534801596e-09,
51211
+ "loss": 0.0543,
51212
+ "step": 6737
51213
+ },
51214
+ {
51215
+ "epoch": 0.9941718922906676,
51216
+ "grad_norm": 3.452421188354492,
51217
+ "learning_rate": 2.017748771531158e-09,
51218
+ "loss": 0.0628,
51219
+ "step": 6738
51220
+ },
51221
+ {
51222
+ "epoch": 0.9943194393212836,
51223
+ "grad_norm": 1.7515523433685303,
51224
+ "learning_rate": 1.915604330464671e-09,
51225
+ "loss": 0.0685,
51226
+ "step": 6739
51227
+ },
51228
+ {
51229
+ "epoch": 0.9944669863518997,
51230
+ "grad_norm": 2.203991413116455,
51231
+ "learning_rate": 1.8161126573823517e-09,
51232
+ "loss": 0.0444,
51233
+ "step": 6740
51234
+ },
51235
+ {
51236
+ "epoch": 0.9944669863518997,
51237
+ "eval_accuracy": 0.9782923299565847,
51238
+ "eval_f1": 0.9629629629629629,
51239
+ "eval_loss": 0.055171407759189606,
51240
+ "eval_precision": 0.9798994974874372,
51241
+ "eval_recall": 0.9466019417475728,
51242
+ "eval_runtime": 48.6957,
51243
+ "eval_samples_per_second": 5.976,
51244
+ "eval_steps_per_second": 0.205,
51245
+ "step": 6740
51246
+ },
51247
+ {
51248
+ "epoch": 0.9946145333825157,
51249
+ "grad_norm": 2.4897584915161133,
51250
+ "learning_rate": 1.7192737786819736e-09,
51251
+ "loss": 0.0722,
51252
+ "step": 6741
51253
+ },
51254
+ {
51255
+ "epoch": 0.9947620804131316,
51256
+ "grad_norm": 1.9944440126419067,
51257
+ "learning_rate": 1.6250877200574278e-09,
51258
+ "loss": 0.0995,
51259
+ "step": 6742
51260
+ },
51261
+ {
51262
+ "epoch": 0.9949096274437477,
51263
+ "grad_norm": 2.042222738265991,
51264
+ "learning_rate": 1.5335545064998346e-09,
51265
+ "loss": 0.0642,
51266
+ "step": 6743
51267
+ },
51268
+ {
51269
+ "epoch": 0.9950571744743637,
51270
+ "grad_norm": 1.745491623878479,
51271
+ "learning_rate": 1.4446741622942128e-09,
51272
+ "loss": 0.0506,
51273
+ "step": 6744
51274
+ },
51275
+ {
51276
+ "epoch": 0.9952047215049797,
51277
+ "grad_norm": 0.9310182929039001,
51278
+ "learning_rate": 1.3584467110228095e-09,
51279
+ "loss": 0.0122,
51280
+ "step": 6745
51281
+ },
51282
+ {
51283
+ "epoch": 0.9953522685355958,
51284
+ "grad_norm": 1.8398277759552002,
51285
+ "learning_rate": 1.2748721755651005e-09,
51286
+ "loss": 0.0261,
51287
+ "step": 6746
51288
+ },
51289
+ {
51290
+ "epoch": 0.9954998155662117,
51291
+ "grad_norm": 1.677306890487671,
51292
+ "learning_rate": 1.1939505780966809e-09,
51293
+ "loss": 0.0358,
51294
+ "step": 6747
51295
+ },
51296
+ {
51297
+ "epoch": 0.9956473625968277,
51298
+ "grad_norm": 1.2638682126998901,
51299
+ "learning_rate": 1.115681940085933e-09,
51300
+ "loss": 0.0283,
51301
+ "step": 6748
51302
+ },
51303
+ {
51304
+ "epoch": 0.9957949096274438,
51305
+ "grad_norm": 3.8664028644561768,
51306
+ "learning_rate": 1.040066282300689e-09,
51307
+ "loss": 0.0514,
51308
+ "step": 6749
51309
+ },
51310
+ {
51311
+ "epoch": 0.9959424566580598,
51312
+ "grad_norm": 1.0908302068710327,
51313
+ "learning_rate": 9.671036248048993e-10,
51314
+ "loss": 0.0171,
51315
+ "step": 6750
51316
+ },
51317
+ {
51318
+ "epoch": 0.9960900036886757,
51319
+ "grad_norm": 4.517603874206543,
51320
+ "learning_rate": 8.967939869553022e-10,
51321
+ "loss": 0.0963,
51322
+ "step": 6751
51323
+ },
51324
+ {
51325
+ "epoch": 0.9962375507192918,
51326
+ "grad_norm": 9.87070369720459,
51327
+ "learning_rate": 8.291373874091957e-10,
51328
+ "loss": 0.0511,
51329
+ "step": 6752
51330
+ },
51331
+ {
51332
+ "epoch": 0.9963850977499078,
51333
+ "grad_norm": 1.7819442749023438,
51334
+ "learning_rate": 7.641338441166657e-10,
51335
+ "loss": 0.0142,
51336
+ "step": 6753
51337
+ },
51338
+ {
51339
+ "epoch": 0.9965326447805238,
51340
+ "grad_norm": 1.9164355993270874,
51341
+ "learning_rate": 7.017833743261371e-10,
51342
+ "loss": 0.0333,
51343
+ "step": 6754
51344
+ },
51345
+ {
51346
+ "epoch": 0.9966801918111398,
51347
+ "grad_norm": 1.178529143333435,
51348
+ "learning_rate": 6.420859945788227e-10,
51349
+ "loss": 0.0175,
51350
+ "step": 6755
51351
+ },
51352
+ {
51353
+ "epoch": 0.9968277388417558,
51354
+ "grad_norm": 3.305959701538086,
51355
+ "learning_rate": 5.850417207153847e-10,
51356
+ "loss": 0.0576,
51357
+ "step": 6756
51358
+ },
51359
+ {
51360
+ "epoch": 0.9969752858723718,
51361
+ "grad_norm": 3.1544477939605713,
51362
+ "learning_rate": 5.306505678714934e-10,
51363
+ "loss": 0.0754,
51364
+ "step": 6757
51365
+ },
51366
+ {
51367
+ "epoch": 0.9971228329029879,
51368
+ "grad_norm": 1.3226299285888672,
51369
+ "learning_rate": 4.789125504778281e-10,
51370
+ "loss": 0.0261,
51371
+ "step": 6758
51372
+ },
51373
+ {
51374
+ "epoch": 0.9972703799336038,
51375
+ "grad_norm": 3.1813669204711914,
51376
+ "learning_rate": 4.2982768226229597e-10,
51377
+ "loss": 0.1108,
51378
+ "step": 6759
51379
+ },
51380
+ {
51381
+ "epoch": 0.9974179269642198,
51382
+ "grad_norm": 2.4142541885375977,
51383
+ "learning_rate": 3.8339597624892366e-10,
51384
+ "loss": 0.0481,
51385
+ "step": 6760
51386
+ },
51387
+ {
51388
+ "epoch": 0.9974179269642198,
51389
+ "eval_accuracy": 0.9782923299565847,
51390
+ "eval_f1": 0.9629629629629629,
51391
+ "eval_loss": 0.05494352802634239,
51392
+ "eval_precision": 0.9798994974874372,
51393
+ "eval_recall": 0.9466019417475728,
51394
+ "eval_runtime": 48.9599,
51395
+ "eval_samples_per_second": 5.944,
51396
+ "eval_steps_per_second": 0.204,
51397
+ "step": 6760
51398
+ },
51399
+ {
51400
+ "epoch": 0.9975654739948359,
51401
+ "grad_norm": 1.254651665687561,
51402
+ "learning_rate": 3.396174447556355e-10,
51403
+ "loss": 0.0174,
51404
+ "step": 6761
51405
+ },
51406
+ {
51407
+ "epoch": 0.9977130210254519,
51408
+ "grad_norm": 2.606041669845581,
51409
+ "learning_rate": 2.984920994009155e-10,
51410
+ "loss": 0.0746,
51411
+ "step": 6762
51412
+ },
51413
+ {
51414
+ "epoch": 0.9978605680560678,
51415
+ "grad_norm": 6.167627334594727,
51416
+ "learning_rate": 2.600199510938151e-10,
51417
+ "loss": 0.1954,
51418
+ "step": 6763
51419
+ },
51420
+ {
51421
+ "epoch": 0.9980081150866839,
51422
+ "grad_norm": 2.2177047729492188,
51423
+ "learning_rate": 2.2420101004394512e-10,
51424
+ "loss": 0.0676,
51425
+ "step": 6764
51426
+ },
51427
+ {
51428
+ "epoch": 0.9981556621172999,
51429
+ "grad_norm": 1.7990946769714355,
51430
+ "learning_rate": 1.9103528575370457e-10,
51431
+ "loss": 0.0404,
51432
+ "step": 6765
51433
+ },
51434
+ {
51435
+ "epoch": 0.9983032091479159,
51436
+ "grad_norm": 1.5802024602890015,
51437
+ "learning_rate": 1.6052278702272107e-10,
51438
+ "loss": 0.0352,
51439
+ "step": 6766
51440
+ },
51441
+ {
51442
+ "epoch": 0.998450756178532,
51443
+ "grad_norm": 1.7018808126449585,
51444
+ "learning_rate": 1.3266352194785114e-10,
51445
+ "loss": 0.0254,
51446
+ "step": 6767
51447
+ },
51448
+ {
51449
+ "epoch": 0.9985983032091479,
51450
+ "grad_norm": 2.102145195007324,
51451
+ "learning_rate": 1.0745749792095971e-10,
51452
+ "loss": 0.0427,
51453
+ "step": 6768
51454
+ },
51455
+ {
51456
+ "epoch": 0.9987458502397639,
51457
+ "grad_norm": 2.0960288047790527,
51458
+ "learning_rate": 8.490472163003027e-11,
51459
+ "loss": 0.0562,
51460
+ "step": 6769
51461
+ },
51462
+ {
51463
+ "epoch": 0.99889339727038,
51464
+ "grad_norm": 2.1226682662963867,
51465
+ "learning_rate": 6.500519905694446e-11,
51466
+ "loss": 0.0399,
51467
+ "step": 6770
51468
+ },
51469
+ {
51470
+ "epoch": 0.999040944300996,
51471
+ "grad_norm": 2.805297374725342,
51472
+ "learning_rate": 4.775893548414345e-11,
51473
+ "loss": 0.128,
51474
+ "step": 6771
51475
+ },
51476
+ {
51477
+ "epoch": 0.9991884913316119,
51478
+ "grad_norm": 3.2651734352111816,
51479
+ "learning_rate": 3.316593548574609e-11,
51480
+ "loss": 0.0469,
51481
+ "step": 6772
51482
+ },
51483
+ {
51484
+ "epoch": 0.999336038362228,
51485
+ "grad_norm": 1.4640666246414185,
51486
+ "learning_rate": 2.1226202935320518e-11,
51487
+ "loss": 0.033,
51488
+ "step": 6773
51489
+ },
51490
+ {
51491
+ "epoch": 0.999483585392844,
51492
+ "grad_norm": 1.624456524848938,
51493
+ "learning_rate": 1.1939740999222793e-11,
51494
+ "loss": 0.0241,
51495
+ "step": 6774
51496
+ },
51497
+ {
51498
+ "epoch": 0.99963113242346,
51499
+ "grad_norm": 1.3273454904556274,
51500
+ "learning_rate": 5.3065521421480356e-12,
51501
+ "loss": 0.034,
51502
+ "step": 6775
51503
+ },
51504
+ {
51505
+ "epoch": 0.999778679454076,
51506
+ "grad_norm": 0.9207583069801331,
51507
+ "learning_rate": 1.3266381237997393e-12,
51508
+ "loss": 0.026,
51509
+ "step": 6776
51510
+ },
51511
+ {
51512
+ "epoch": 0.999926226484692,
51513
+ "grad_norm": 1.3743399381637573,
51514
+ "learning_rate": 0.0,
51515
+ "loss": 0.0357,
51516
+ "step": 6777
51517
  }
51518
  ],
51519
  "logging_steps": 1,
 
51528
  "should_evaluate": false,
51529
  "should_log": false,
51530
  "should_save": true,
51531
+ "should_training_stop": true
51532
  },
51533
  "attributes": {}
51534
  }
51535
  },
51536
+ "total_flos": 2.0879468077357466e+18,
51537
  "train_batch_size": 8,
51538
  "trial_name": null,
51539
  "trial_params": null