mtzig commited on
Commit
13d988c
·
verified ·
1 Parent(s): 5ec3d1b

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca9ad4c45f6ed9ff141594db885ce7f7936b72c33bf7831ea51061b751c035d0
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab77ddbb637a9bc0d36ef5dbbbaa0af341f0bf80b31d6e285eda485f1898eb8f
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0005a7a0bf83524bb14a93793c3fdab26d6c2653ecf1f287deeb08e8e78ca1fd
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286a3c21980a7a404bde1675a6009d842c88e5d90ac29813f962481a4368f26f
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ed9ba22411531d762cf848c2d9daddff6ee7f29ca806d5aef7f5ba9813947f0
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f72128a18889f676efa0e2e54c424b47d5e4132cc601aa9074f6f5411b8f94
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:371c2c2e2799dc1b7de0b141a212b857758839245977f9c58714ec11a1162c7e
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:041f67a3ce2b2d81077a75116ed983ec1c3d6e3f3611853c338c7ecf44913d9e
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c2bdb37a902663861f07a52281ef1995bcfda4e8830c535faae292fabb659b6
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851162e189682dac7fc53a1c0d10ceb5145d8569c1a94696d7c715b4c49a67ea
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:478afb018e67b6963a405f6bfecae60632c4d7b580db98fb4a37e4698026d54a
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d32e1e73d5d548c4bd50868ff94314b76b56f22bf14438e5afcb5d47b865b7
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2f9589c38a3685a3a7913c666aa2459077a853b4e8f8a5230bce75fa99b9825
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9738f30136c7a74ad2e25b79cf200868a8a6622ff78163abd5aa23402612abf6
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6419cd9723247df1fda540548fd8769dc6b91a2aa84ee458e9c056ee561c4042
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a3d3761905544d982ee155e6770c63fdcd8e1d6ad804c9e3fc0b48ef3c557a
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5d4b484a25f92c99275105a25a5abc87d9965b9b7b7ca782045935178f7d615
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6294caa602c6fe4743ca0c7205bb0551de153ef41f54789786a229cd626bef4
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c549c91cf2b0439baf2468c247f8e2109889f720a27e0d09c9b7d5f695e49a5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:305f7da916867733708b5e00527298ca628c3162916331e86427a0e6c1d84c36
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81f7b75ebe10eb5c6ecc97c93cde36ee0b594c67c95103dbdcabab169117e465
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91731ae666b3fff1615f6c83fbbbe5160c401bb673770f4a96920e7df7c75154
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9ada658c72a7f1c0eace40e44824bfa74094a719f3408a314ecbea87cf54304
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30c1d4c35a1c4cecfd92a1a7ea971a84bf462bf7dead3baefbde0a5e7b2317cc
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c8c47081e0cd48c8e1647d14b0cfbcdb2a632b234bf18bf1a619d30eef11321
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1735f1ec303b05af874a7e52da143869d5f926b0a117b607b330e92e0e8872be
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8058781701825077,
5
  "eval_steps": 20,
6
- "global_step": 1700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12939,6 +12939,766 @@
12939
  "eval_samples_per_second": 5.502,
12940
  "eval_steps_per_second": 0.183,
12941
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12942
  }
12943
  ],
12944
  "logging_steps": 1,
@@ -12958,7 +13718,7 @@
12958
  "attributes": {}
12959
  }
12960
  },
12961
- "total_flos": 4.5441970776047616e+17,
12962
  "train_batch_size": 8,
12963
  "trial_name": null,
12964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8532827684285376,
5
  "eval_steps": 20,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12939
  "eval_samples_per_second": 5.502,
12940
  "eval_steps_per_second": 0.183,
12941
  "step": 1700
12942
+ },
12943
+ {
12944
+ "epoch": 0.806352216164968,
12945
+ "grad_norm": 5.284399509429932,
12946
+ "learning_rate": 2.1949715425360173e-06,
12947
+ "loss": 0.2302,
12948
+ "step": 1701
12949
+ },
12950
+ {
12951
+ "epoch": 0.8068262621474283,
12952
+ "grad_norm": 3.2276976108551025,
12953
+ "learning_rate": 2.184634643477831e-06,
12954
+ "loss": 0.1153,
12955
+ "step": 1702
12956
+ },
12957
+ {
12958
+ "epoch": 0.8073003081298886,
12959
+ "grad_norm": 3.992670774459839,
12960
+ "learning_rate": 2.174319156384186e-06,
12961
+ "loss": 0.1346,
12962
+ "step": 1703
12963
+ },
12964
+ {
12965
+ "epoch": 0.8077743541123489,
12966
+ "grad_norm": 3.4810454845428467,
12967
+ "learning_rate": 2.164025109516692e-06,
12968
+ "loss": 0.095,
12969
+ "step": 1704
12970
+ },
12971
+ {
12972
+ "epoch": 0.8082484000948092,
12973
+ "grad_norm": 6.212849140167236,
12974
+ "learning_rate": 2.15375253107823e-06,
12975
+ "loss": 0.2299,
12976
+ "step": 1705
12977
+ },
12978
+ {
12979
+ "epoch": 0.8087224460772695,
12980
+ "grad_norm": 6.363603591918945,
12981
+ "learning_rate": 2.1435014492128547e-06,
12982
+ "loss": 0.2338,
12983
+ "step": 1706
12984
+ },
12985
+ {
12986
+ "epoch": 0.8091964920597298,
12987
+ "grad_norm": 2.451110363006592,
12988
+ "learning_rate": 2.1332718920057307e-06,
12989
+ "loss": 0.084,
12990
+ "step": 1707
12991
+ },
12992
+ {
12993
+ "epoch": 0.8096705380421901,
12994
+ "grad_norm": 6.087503910064697,
12995
+ "learning_rate": 2.1230638874830413e-06,
12996
+ "loss": 0.1361,
12997
+ "step": 1708
12998
+ },
12999
+ {
13000
+ "epoch": 0.8101445840246504,
13001
+ "grad_norm": 6.973792552947998,
13002
+ "learning_rate": 2.1128774636119307e-06,
13003
+ "loss": 0.2558,
13004
+ "step": 1709
13005
+ },
13006
+ {
13007
+ "epoch": 0.8106186300071107,
13008
+ "grad_norm": 6.504344463348389,
13009
+ "learning_rate": 2.102712648300418e-06,
13010
+ "loss": 0.1156,
13011
+ "step": 1710
13012
+ },
13013
+ {
13014
+ "epoch": 0.811092675989571,
13015
+ "grad_norm": 5.404758930206299,
13016
+ "learning_rate": 2.0925694693973162e-06,
13017
+ "loss": 0.1702,
13018
+ "step": 1711
13019
+ },
13020
+ {
13021
+ "epoch": 0.8115667219720313,
13022
+ "grad_norm": 5.191570281982422,
13023
+ "learning_rate": 2.082447954692164e-06,
13024
+ "loss": 0.1003,
13025
+ "step": 1712
13026
+ },
13027
+ {
13028
+ "epoch": 0.8120407679544915,
13029
+ "grad_norm": 3.821763515472412,
13030
+ "learning_rate": 2.0723481319151427e-06,
13031
+ "loss": 0.1208,
13032
+ "step": 1713
13033
+ },
13034
+ {
13035
+ "epoch": 0.8125148139369519,
13036
+ "grad_norm": 6.716168403625488,
13037
+ "learning_rate": 2.062270028737008e-06,
13038
+ "loss": 0.2031,
13039
+ "step": 1714
13040
+ },
13041
+ {
13042
+ "epoch": 0.8129888599194122,
13043
+ "grad_norm": 2.98030948638916,
13044
+ "learning_rate": 2.052213672769007e-06,
13045
+ "loss": 0.101,
13046
+ "step": 1715
13047
+ },
13048
+ {
13049
+ "epoch": 0.8134629059018725,
13050
+ "grad_norm": 5.622329235076904,
13051
+ "learning_rate": 2.042179091562805e-06,
13052
+ "loss": 0.1626,
13053
+ "step": 1716
13054
+ },
13055
+ {
13056
+ "epoch": 0.8139369518843328,
13057
+ "grad_norm": 3.3454723358154297,
13058
+ "learning_rate": 2.032166312610411e-06,
13059
+ "loss": 0.0978,
13060
+ "step": 1717
13061
+ },
13062
+ {
13063
+ "epoch": 0.814410997866793,
13064
+ "grad_norm": 5.193914413452148,
13065
+ "learning_rate": 2.0221753633441033e-06,
13066
+ "loss": 0.1742,
13067
+ "step": 1718
13068
+ },
13069
+ {
13070
+ "epoch": 0.8148850438492534,
13071
+ "grad_norm": 6.165769577026367,
13072
+ "learning_rate": 2.012206271136353e-06,
13073
+ "loss": 0.1491,
13074
+ "step": 1719
13075
+ },
13076
+ {
13077
+ "epoch": 0.8153590898317137,
13078
+ "grad_norm": 6.2735795974731445,
13079
+ "learning_rate": 2.002259063299744e-06,
13080
+ "loss": 0.1477,
13081
+ "step": 1720
13082
+ },
13083
+ {
13084
+ "epoch": 0.8153590898317137,
13085
+ "eval_accuracy": 0.9951690821256038,
13086
+ "eval_f1": 0.9444444444444444,
13087
+ "eval_loss": 0.0112903518602252,
13088
+ "eval_precision": 0.9107142857142857,
13089
+ "eval_recall": 0.9807692307692307,
13090
+ "eval_runtime": 49.2837,
13091
+ "eval_samples_per_second": 5.499,
13092
+ "eval_steps_per_second": 0.183,
13093
+ "step": 1720
13094
+ },
13095
+ {
13096
+ "epoch": 0.8158331358141739,
13097
+ "grad_norm": 8.393010139465332,
13098
+ "learning_rate": 1.992333767086905e-06,
13099
+ "loss": 0.2223,
13100
+ "step": 1721
13101
+ },
13102
+ {
13103
+ "epoch": 0.8163071817966343,
13104
+ "grad_norm": 8.414145469665527,
13105
+ "learning_rate": 1.982430409690439e-06,
13106
+ "loss": 0.1873,
13107
+ "step": 1722
13108
+ },
13109
+ {
13110
+ "epoch": 0.8167812277790946,
13111
+ "grad_norm": 3.9387011528015137,
13112
+ "learning_rate": 1.972549018242836e-06,
13113
+ "loss": 0.1164,
13114
+ "step": 1723
13115
+ },
13116
+ {
13117
+ "epoch": 0.8172552737615548,
13118
+ "grad_norm": 6.245006084442139,
13119
+ "learning_rate": 1.9626896198164093e-06,
13120
+ "loss": 0.1791,
13121
+ "step": 1724
13122
+ },
13123
+ {
13124
+ "epoch": 0.8177293197440152,
13125
+ "grad_norm": 3.7231218814849854,
13126
+ "learning_rate": 1.9528522414232122e-06,
13127
+ "loss": 0.1483,
13128
+ "step": 1725
13129
+ },
13130
+ {
13131
+ "epoch": 0.8182033657264755,
13132
+ "grad_norm": 7.006000995635986,
13133
+ "learning_rate": 1.9430369100149727e-06,
13134
+ "loss": 0.2054,
13135
+ "step": 1726
13136
+ },
13137
+ {
13138
+ "epoch": 0.8186774117089358,
13139
+ "grad_norm": 4.269167900085449,
13140
+ "learning_rate": 1.9332436524830167e-06,
13141
+ "loss": 0.1074,
13142
+ "step": 1727
13143
+ },
13144
+ {
13145
+ "epoch": 0.8191514576913961,
13146
+ "grad_norm": 4.43737268447876,
13147
+ "learning_rate": 1.9234724956581918e-06,
13148
+ "loss": 0.1194,
13149
+ "step": 1728
13150
+ },
13151
+ {
13152
+ "epoch": 0.8196255036738563,
13153
+ "grad_norm": 5.734049320220947,
13154
+ "learning_rate": 1.9137234663107995e-06,
13155
+ "loss": 0.2281,
13156
+ "step": 1729
13157
+ },
13158
+ {
13159
+ "epoch": 0.8200995496563167,
13160
+ "grad_norm": 4.329126358032227,
13161
+ "learning_rate": 1.9039965911505098e-06,
13162
+ "loss": 0.1464,
13163
+ "step": 1730
13164
+ },
13165
+ {
13166
+ "epoch": 0.820573595638777,
13167
+ "grad_norm": 6.123368263244629,
13168
+ "learning_rate": 1.8942918968263036e-06,
13169
+ "loss": 0.1876,
13170
+ "step": 1731
13171
+ },
13172
+ {
13173
+ "epoch": 0.8210476416212372,
13174
+ "grad_norm": 5.27827262878418,
13175
+ "learning_rate": 1.8846094099263911e-06,
13176
+ "loss": 0.1182,
13177
+ "step": 1732
13178
+ },
13179
+ {
13180
+ "epoch": 0.8215216876036976,
13181
+ "grad_norm": 3.905064344406128,
13182
+ "learning_rate": 1.8749491569781397e-06,
13183
+ "loss": 0.1297,
13184
+ "step": 1733
13185
+ },
13186
+ {
13187
+ "epoch": 0.8219957335861579,
13188
+ "grad_norm": 4.914556980133057,
13189
+ "learning_rate": 1.8653111644480004e-06,
13190
+ "loss": 0.1504,
13191
+ "step": 1734
13192
+ },
13193
+ {
13194
+ "epoch": 0.8224697795686181,
13195
+ "grad_norm": 2.135080337524414,
13196
+ "learning_rate": 1.8556954587414377e-06,
13197
+ "loss": 0.0756,
13198
+ "step": 1735
13199
+ },
13200
+ {
13201
+ "epoch": 0.8229438255510785,
13202
+ "grad_norm": 5.558071136474609,
13203
+ "learning_rate": 1.8461020662028583e-06,
13204
+ "loss": 0.1586,
13205
+ "step": 1736
13206
+ },
13207
+ {
13208
+ "epoch": 0.8234178715335387,
13209
+ "grad_norm": 4.518209934234619,
13210
+ "learning_rate": 1.8365310131155345e-06,
13211
+ "loss": 0.2052,
13212
+ "step": 1737
13213
+ },
13214
+ {
13215
+ "epoch": 0.8238919175159991,
13216
+ "grad_norm": 5.095973014831543,
13217
+ "learning_rate": 1.8269823257015351e-06,
13218
+ "loss": 0.1441,
13219
+ "step": 1738
13220
+ },
13221
+ {
13222
+ "epoch": 0.8243659634984594,
13223
+ "grad_norm": 5.229091644287109,
13224
+ "learning_rate": 1.8174560301216527e-06,
13225
+ "loss": 0.1543,
13226
+ "step": 1739
13227
+ },
13228
+ {
13229
+ "epoch": 0.8248400094809196,
13230
+ "grad_norm": 5.269925594329834,
13231
+ "learning_rate": 1.807952152475333e-06,
13232
+ "loss": 0.1781,
13233
+ "step": 1740
13234
+ },
13235
+ {
13236
+ "epoch": 0.8248400094809196,
13237
+ "eval_accuracy": 0.9935587761674718,
13238
+ "eval_f1": 0.9272727272727272,
13239
+ "eval_loss": 0.01386988628655672,
13240
+ "eval_precision": 0.8793103448275862,
13241
+ "eval_recall": 0.9807692307692307,
13242
+ "eval_runtime": 50.0148,
13243
+ "eval_samples_per_second": 5.418,
13244
+ "eval_steps_per_second": 0.18,
13245
+ "step": 1740
13246
+ },
13247
+ {
13248
+ "epoch": 0.82531405546338,
13249
+ "grad_norm": 3.926471710205078,
13250
+ "learning_rate": 1.7984707188006034e-06,
13251
+ "loss": 0.1284,
13252
+ "step": 1741
13253
+ },
13254
+ {
13255
+ "epoch": 0.8257881014458403,
13256
+ "grad_norm": 3.689272403717041,
13257
+ "learning_rate": 1.7890117550739995e-06,
13258
+ "loss": 0.1592,
13259
+ "step": 1742
13260
+ },
13261
+ {
13262
+ "epoch": 0.8262621474283005,
13263
+ "grad_norm": 8.198687553405762,
13264
+ "learning_rate": 1.7795752872104965e-06,
13265
+ "loss": 0.2166,
13266
+ "step": 1743
13267
+ },
13268
+ {
13269
+ "epoch": 0.8267361934107609,
13270
+ "grad_norm": 5.243244171142578,
13271
+ "learning_rate": 1.7701613410634367e-06,
13272
+ "loss": 0.238,
13273
+ "step": 1744
13274
+ },
13275
+ {
13276
+ "epoch": 0.8272102393932211,
13277
+ "grad_norm": 7.627920150756836,
13278
+ "learning_rate": 1.7607699424244583e-06,
13279
+ "loss": 0.1358,
13280
+ "step": 1745
13281
+ },
13282
+ {
13283
+ "epoch": 0.8276842853756814,
13284
+ "grad_norm": 4.7214884757995605,
13285
+ "learning_rate": 1.7514011170234258e-06,
13286
+ "loss": 0.1165,
13287
+ "step": 1746
13288
+ },
13289
+ {
13290
+ "epoch": 0.8281583313581418,
13291
+ "grad_norm": 4.949544906616211,
13292
+ "learning_rate": 1.7420548905283619e-06,
13293
+ "loss": 0.1227,
13294
+ "step": 1747
13295
+ },
13296
+ {
13297
+ "epoch": 0.828632377340602,
13298
+ "grad_norm": 3.5602853298187256,
13299
+ "learning_rate": 1.7327312885453695e-06,
13300
+ "loss": 0.1247,
13301
+ "step": 1748
13302
+ },
13303
+ {
13304
+ "epoch": 0.8291064233230624,
13305
+ "grad_norm": 4.533194065093994,
13306
+ "learning_rate": 1.7234303366185712e-06,
13307
+ "loss": 0.183,
13308
+ "step": 1749
13309
+ },
13310
+ {
13311
+ "epoch": 0.8295804693055226,
13312
+ "grad_norm": 4.326444625854492,
13313
+ "learning_rate": 1.7141520602300332e-06,
13314
+ "loss": 0.1427,
13315
+ "step": 1750
13316
+ },
13317
+ {
13318
+ "epoch": 0.8300545152879829,
13319
+ "grad_norm": 2.9050464630126953,
13320
+ "learning_rate": 1.7048964847996928e-06,
13321
+ "loss": 0.1105,
13322
+ "step": 1751
13323
+ },
13324
+ {
13325
+ "epoch": 0.8305285612704433,
13326
+ "grad_norm": 4.191965579986572,
13327
+ "learning_rate": 1.6956636356852984e-06,
13328
+ "loss": 0.1444,
13329
+ "step": 1752
13330
+ },
13331
+ {
13332
+ "epoch": 0.8310026072529035,
13333
+ "grad_norm": 4.245302677154541,
13334
+ "learning_rate": 1.6864535381823333e-06,
13335
+ "loss": 0.1695,
13336
+ "step": 1753
13337
+ },
13338
+ {
13339
+ "epoch": 0.8314766532353638,
13340
+ "grad_norm": 4.924167156219482,
13341
+ "learning_rate": 1.6772662175239451e-06,
13342
+ "loss": 0.1323,
13343
+ "step": 1754
13344
+ },
13345
+ {
13346
+ "epoch": 0.8319506992178242,
13347
+ "grad_norm": 3.5585765838623047,
13348
+ "learning_rate": 1.668101698880883e-06,
13349
+ "loss": 0.1285,
13350
+ "step": 1755
13351
+ },
13352
+ {
13353
+ "epoch": 0.8324247452002844,
13354
+ "grad_norm": 5.416965484619141,
13355
+ "learning_rate": 1.6589600073614175e-06,
13356
+ "loss": 0.1603,
13357
+ "step": 1756
13358
+ },
13359
+ {
13360
+ "epoch": 0.8328987911827447,
13361
+ "grad_norm": 4.387139320373535,
13362
+ "learning_rate": 1.6498411680112925e-06,
13363
+ "loss": 0.1554,
13364
+ "step": 1757
13365
+ },
13366
+ {
13367
+ "epoch": 0.833372837165205,
13368
+ "grad_norm": 3.8568196296691895,
13369
+ "learning_rate": 1.6407452058136298e-06,
13370
+ "loss": 0.1279,
13371
+ "step": 1758
13372
+ },
13373
+ {
13374
+ "epoch": 0.8338468831476653,
13375
+ "grad_norm": 5.32737922668457,
13376
+ "learning_rate": 1.6316721456888807e-06,
13377
+ "loss": 0.209,
13378
+ "step": 1759
13379
+ },
13380
+ {
13381
+ "epoch": 0.8343209291301257,
13382
+ "grad_norm": 6.815487861633301,
13383
+ "learning_rate": 1.6226220124947513e-06,
13384
+ "loss": 0.1517,
13385
+ "step": 1760
13386
+ },
13387
+ {
13388
+ "epoch": 0.8343209291301257,
13389
+ "eval_accuracy": 0.9935587761674718,
13390
+ "eval_f1": 0.9272727272727272,
13391
+ "eval_loss": 0.012907618656754494,
13392
+ "eval_precision": 0.8793103448275862,
13393
+ "eval_recall": 0.9807692307692307,
13394
+ "eval_runtime": 50.0121,
13395
+ "eval_samples_per_second": 5.419,
13396
+ "eval_steps_per_second": 0.18,
13397
+ "step": 1760
13398
+ },
13399
+ {
13400
+ "epoch": 0.8347949751125859,
13401
+ "grad_norm": 3.5248119831085205,
13402
+ "learning_rate": 1.6135948310261272e-06,
13403
+ "loss": 0.1413,
13404
+ "step": 1761
13405
+ },
13406
+ {
13407
+ "epoch": 0.8352690210950462,
13408
+ "grad_norm": 3.0675511360168457,
13409
+ "learning_rate": 1.6045906260150212e-06,
13410
+ "loss": 0.1353,
13411
+ "step": 1762
13412
+ },
13413
+ {
13414
+ "epoch": 0.8357430670775066,
13415
+ "grad_norm": 7.004110336303711,
13416
+ "learning_rate": 1.595609422130494e-06,
13417
+ "loss": 0.1473,
13418
+ "step": 1763
13419
+ },
13420
+ {
13421
+ "epoch": 0.8362171130599668,
13422
+ "grad_norm": 2.630929470062256,
13423
+ "learning_rate": 1.5866512439785876e-06,
13424
+ "loss": 0.0991,
13425
+ "step": 1764
13426
+ },
13427
+ {
13428
+ "epoch": 0.8366911590424271,
13429
+ "grad_norm": 3.722667694091797,
13430
+ "learning_rate": 1.5777161161022614e-06,
13431
+ "loss": 0.1403,
13432
+ "step": 1765
13433
+ },
13434
+ {
13435
+ "epoch": 0.8371652050248874,
13436
+ "grad_norm": 3.6338841915130615,
13437
+ "learning_rate": 1.5688040629813229e-06,
13438
+ "loss": 0.0963,
13439
+ "step": 1766
13440
+ },
13441
+ {
13442
+ "epoch": 0.8376392510073477,
13443
+ "grad_norm": 2.220780611038208,
13444
+ "learning_rate": 1.5599151090323627e-06,
13445
+ "loss": 0.094,
13446
+ "step": 1767
13447
+ },
13448
+ {
13449
+ "epoch": 0.838113296989808,
13450
+ "grad_norm": 5.549960136413574,
13451
+ "learning_rate": 1.5510492786086828e-06,
13452
+ "loss": 0.2037,
13453
+ "step": 1768
13454
+ },
13455
+ {
13456
+ "epoch": 0.8385873429722683,
13457
+ "grad_norm": 3.976283073425293,
13458
+ "learning_rate": 1.5422065960002364e-06,
13459
+ "loss": 0.1453,
13460
+ "step": 1769
13461
+ },
13462
+ {
13463
+ "epoch": 0.8390613889547286,
13464
+ "grad_norm": 5.508865833282471,
13465
+ "learning_rate": 1.5333870854335554e-06,
13466
+ "loss": 0.2064,
13467
+ "step": 1770
13468
+ },
13469
+ {
13470
+ "epoch": 0.839535434937189,
13471
+ "grad_norm": 3.408942222595215,
13472
+ "learning_rate": 1.5245907710716912e-06,
13473
+ "loss": 0.1245,
13474
+ "step": 1771
13475
+ },
13476
+ {
13477
+ "epoch": 0.8400094809196492,
13478
+ "grad_norm": 7.579738616943359,
13479
+ "learning_rate": 1.5158176770141342e-06,
13480
+ "loss": 0.1888,
13481
+ "step": 1772
13482
+ },
13483
+ {
13484
+ "epoch": 0.8404835269021095,
13485
+ "grad_norm": 6.760648727416992,
13486
+ "learning_rate": 1.5070678272967654e-06,
13487
+ "loss": 0.2148,
13488
+ "step": 1773
13489
+ },
13490
+ {
13491
+ "epoch": 0.8409575728845698,
13492
+ "grad_norm": 3.159531593322754,
13493
+ "learning_rate": 1.4983412458917846e-06,
13494
+ "loss": 0.1209,
13495
+ "step": 1774
13496
+ },
13497
+ {
13498
+ "epoch": 0.8414316188670301,
13499
+ "grad_norm": 4.622367858886719,
13500
+ "learning_rate": 1.4896379567076369e-06,
13501
+ "loss": 0.1917,
13502
+ "step": 1775
13503
+ },
13504
+ {
13505
+ "epoch": 0.8419056648494904,
13506
+ "grad_norm": 3.152876377105713,
13507
+ "learning_rate": 1.4809579835889564e-06,
13508
+ "loss": 0.0609,
13509
+ "step": 1776
13510
+ },
13511
+ {
13512
+ "epoch": 0.8423797108319507,
13513
+ "grad_norm": 3.5214357376098633,
13514
+ "learning_rate": 1.472301350316495e-06,
13515
+ "loss": 0.1228,
13516
+ "step": 1777
13517
+ },
13518
+ {
13519
+ "epoch": 0.842853756814411,
13520
+ "grad_norm": 2.849489450454712,
13521
+ "learning_rate": 1.4636680806070625e-06,
13522
+ "loss": 0.0938,
13523
+ "step": 1778
13524
+ },
13525
+ {
13526
+ "epoch": 0.8433278027968713,
13527
+ "grad_norm": 5.918670654296875,
13528
+ "learning_rate": 1.4550581981134571e-06,
13529
+ "loss": 0.185,
13530
+ "step": 1779
13531
+ },
13532
+ {
13533
+ "epoch": 0.8438018487793316,
13534
+ "grad_norm": 4.758391857147217,
13535
+ "learning_rate": 1.4464717264244043e-06,
13536
+ "loss": 0.1811,
13537
+ "step": 1780
13538
+ },
13539
+ {
13540
+ "epoch": 0.8438018487793316,
13541
+ "eval_accuracy": 0.9943639291465378,
13542
+ "eval_f1": 0.9357798165137615,
13543
+ "eval_loss": 0.012266670353710651,
13544
+ "eval_precision": 0.8947368421052632,
13545
+ "eval_recall": 0.9807692307692307,
13546
+ "eval_runtime": 50.3857,
13547
+ "eval_samples_per_second": 5.379,
13548
+ "eval_steps_per_second": 0.179,
13549
+ "step": 1780
13550
+ },
13551
+ {
13552
+ "epoch": 0.8442758947617919,
13553
+ "grad_norm": 3.0533530712127686,
13554
+ "learning_rate": 1.43790868906449e-06,
13555
+ "loss": 0.0867,
13556
+ "step": 1781
13557
+ },
13558
+ {
13559
+ "epoch": 0.8447499407442522,
13560
+ "grad_norm": 3.962001085281372,
13561
+ "learning_rate": 1.429369109494091e-06,
13562
+ "loss": 0.182,
13563
+ "step": 1782
13564
+ },
13565
+ {
13566
+ "epoch": 0.8452239867267125,
13567
+ "grad_norm": 4.5312180519104,
13568
+ "learning_rate": 1.4208530111093244e-06,
13569
+ "loss": 0.1194,
13570
+ "step": 1783
13571
+ },
13572
+ {
13573
+ "epoch": 0.8456980327091728,
13574
+ "grad_norm": 6.248788356781006,
13575
+ "learning_rate": 1.4123604172419714e-06,
13576
+ "loss": 0.1418,
13577
+ "step": 1784
13578
+ },
13579
+ {
13580
+ "epoch": 0.8461720786916331,
13581
+ "grad_norm": 5.026639938354492,
13582
+ "learning_rate": 1.4038913511594166e-06,
13583
+ "loss": 0.1182,
13584
+ "step": 1785
13585
+ },
13586
+ {
13587
+ "epoch": 0.8466461246740934,
13588
+ "grad_norm": 3.611996650695801,
13589
+ "learning_rate": 1.395445836064586e-06,
13590
+ "loss": 0.1078,
13591
+ "step": 1786
13592
+ },
13593
+ {
13594
+ "epoch": 0.8471201706565537,
13595
+ "grad_norm": 6.5539984703063965,
13596
+ "learning_rate": 1.3870238950958837e-06,
13597
+ "loss": 0.15,
13598
+ "step": 1787
13599
+ },
13600
+ {
13601
+ "epoch": 0.847594216639014,
13602
+ "grad_norm": 5.68766450881958,
13603
+ "learning_rate": 1.378625551327124e-06,
13604
+ "loss": 0.1344,
13605
+ "step": 1788
13606
+ },
13607
+ {
13608
+ "epoch": 0.8480682626214743,
13609
+ "grad_norm": 5.3630828857421875,
13610
+ "learning_rate": 1.3702508277674731e-06,
13611
+ "loss": 0.2046,
13612
+ "step": 1789
13613
+ },
13614
+ {
13615
+ "epoch": 0.8485423086039345,
13616
+ "grad_norm": 3.6046223640441895,
13617
+ "learning_rate": 1.3618997473613837e-06,
13618
+ "loss": 0.1093,
13619
+ "step": 1790
13620
+ },
13621
+ {
13622
+ "epoch": 0.8490163545863949,
13623
+ "grad_norm": 5.983584403991699,
13624
+ "learning_rate": 1.353572332988534e-06,
13625
+ "loss": 0.1991,
13626
+ "step": 1791
13627
+ },
13628
+ {
13629
+ "epoch": 0.8494904005688552,
13630
+ "grad_norm": 10.93433952331543,
13631
+ "learning_rate": 1.3452686074637632e-06,
13632
+ "loss": 0.1925,
13633
+ "step": 1792
13634
+ },
13635
+ {
13636
+ "epoch": 0.8499644465513154,
13637
+ "grad_norm": 5.214844226837158,
13638
+ "learning_rate": 1.3369885935370086e-06,
13639
+ "loss": 0.1227,
13640
+ "step": 1793
13641
+ },
13642
+ {
13643
+ "epoch": 0.8504384925337758,
13644
+ "grad_norm": 4.261415481567383,
13645
+ "learning_rate": 1.328732313893245e-06,
13646
+ "loss": 0.1287,
13647
+ "step": 1794
13648
+ },
13649
+ {
13650
+ "epoch": 0.850912538516236,
13651
+ "grad_norm": 4.207308292388916,
13652
+ "learning_rate": 1.320499791152421e-06,
13653
+ "loss": 0.2133,
13654
+ "step": 1795
13655
+ },
13656
+ {
13657
+ "epoch": 0.8513865844986964,
13658
+ "grad_norm": 4.4212141036987305,
13659
+ "learning_rate": 1.3122910478693984e-06,
13660
+ "loss": 0.1429,
13661
+ "step": 1796
13662
+ },
13663
+ {
13664
+ "epoch": 0.8518606304811567,
13665
+ "grad_norm": 4.221442222595215,
13666
+ "learning_rate": 1.30410610653389e-06,
13667
+ "loss": 0.1585,
13668
+ "step": 1797
13669
+ },
13670
+ {
13671
+ "epoch": 0.8523346764636169,
13672
+ "grad_norm": 3.037301778793335,
13673
+ "learning_rate": 1.295944989570398e-06,
13674
+ "loss": 0.1518,
13675
+ "step": 1798
13676
+ },
13677
+ {
13678
+ "epoch": 0.8528087224460773,
13679
+ "grad_norm": 4.325135231018066,
13680
+ "learning_rate": 1.2878077193381511e-06,
13681
+ "loss": 0.1136,
13682
+ "step": 1799
13683
+ },
13684
+ {
13685
+ "epoch": 0.8532827684285376,
13686
+ "grad_norm": 4.634499549865723,
13687
+ "learning_rate": 1.279694318131046e-06,
13688
+ "loss": 0.1592,
13689
+ "step": 1800
13690
+ },
13691
+ {
13692
+ "epoch": 0.8532827684285376,
13693
+ "eval_accuracy": 0.9943639291465378,
13694
+ "eval_f1": 0.9369369369369369,
13695
+ "eval_loss": 0.01357492059469223,
13696
+ "eval_precision": 0.8813559322033898,
13697
+ "eval_recall": 1.0,
13698
+ "eval_runtime": 51.1253,
13699
+ "eval_samples_per_second": 5.301,
13700
+ "eval_steps_per_second": 0.176,
13701
+ "step": 1800
13702
  }
13703
  ],
13704
  "logging_steps": 1,
 
13718
  "attributes": {}
13719
  }
13720
  },
13721
+ "total_flos": 4.8095721145604506e+17,
13722
  "train_batch_size": 8,
13723
  "trial_name": null,
13724
  "trial_params": null