CocoRoF commited on
Commit
a47c5f3
·
verified ·
1 Parent(s): 3e2e45b

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f6fca18b3d6839cfa4f9b00cec6f979a279d6161ccf0e227ea2f0e6664d6d3e
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6621097a97062a5102e67fc29c2bb01fb6549601c085f8f26cce5a1634634ee
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:138d6cf3c8fe05fea07df883537101df6a3d38e7d05cbcc03796a983de350576
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e77fb5c2bafed3ee939a6057af4a25e874ed04b715f60fc720a59e7b1d77f2d
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28cdaddb959868042b846248e699766aefc2fadab97732661ad902989f1034df
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b04d6c2e8863bbad65481224e2bdca0706f808989765d4a58e7054f3e5dac5
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f01a643a1ae2b83dd1c19bc6b73325f7e12cc5322058a11111e293dc5b31ae9d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ae3ef9777e30c36dff6498b006da1eb150bccee38de6cf7669f386fc977289b
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a90f4546ff0a4d9c836b2695bc4b1ddad6eb64e578565dd4c83c3a0c3672df7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680cdbb58729160d28a1f3cc615b2c063f7c72522976ed4abf05aaf19f07acb8
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:649b5fa0e92e74982a79e3759794b1cfec60cf9441738902668d54e2ffe1767b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c050129d4c78b5963b9d24ede87255fc330819afb083e880a4ab6391077de6
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9e8b7d006141b3943e31b1b95143c70d5c410839f60e8892c3ebb5474fa5b82
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95927238adba95b6358a20dd1852c4905066c03f5b6f24857e2f6c82bb9f0977
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab8e9d82889b9d58c21adc3199b61dc25e089ed0456cd04a5834b8213920db8d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7431d23b9b61660f73b469a3e73ae62f971d8bd2ced76f239fd78258fe40a803
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d61bbe5a4669c770dea677fdd22d95a5f9a1874c146a203a6de6b923066699e2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bdcc55ac2d89ab7fac22a6eb989c2be897f201356182513ed90bc09a5326786
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:208e36b51f1fe5107b8000b99406d4ff1bd7e95578591bc1f581b4593f80e4c6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae3da8149789ffc686c284e85fc275d996d793ce0edd8fa2949c1a21a4de8c9
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81e0e2c967dab9f9c48f59c1d3cd0a40f676964ec54c91035ecabb3e1c2f4b45
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708acca42b057f68ded76410daaf0ceaf94be65729403bb2d72b15a907559585
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.497656982193065,
5
  "eval_steps": 250,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6119,6 +6119,770 @@
6119
  "eval_spearman_manhattan": 0.7409361299302836,
6120
  "eval_steps_per_second": 8.407,
6121
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6122
  }
6123
  ],
6124
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.434864104967197,
5
  "eval_steps": 250,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6119
  "eval_spearman_manhattan": 0.7409361299302836,
6120
  "eval_steps_per_second": 8.407,
6121
  "step": 8000
6122
+ },
6123
+ {
6124
+ "epoch": 7.507029053420806,
6125
+ "grad_norm": 0.443439781665802,
6126
+ "learning_rate": 9.941352194350481e-06,
6127
+ "loss": 0.0301,
6128
+ "step": 8010
6129
+ },
6130
+ {
6131
+ "epoch": 7.516401124648548,
6132
+ "grad_norm": 0.5801528692245483,
6133
+ "learning_rate": 9.941278976116214e-06,
6134
+ "loss": 0.0379,
6135
+ "step": 8020
6136
+ },
6137
+ {
6138
+ "epoch": 7.525773195876289,
6139
+ "grad_norm": 0.9093418717384338,
6140
+ "learning_rate": 9.941205757881943e-06,
6141
+ "loss": 0.0376,
6142
+ "step": 8030
6143
+ },
6144
+ {
6145
+ "epoch": 7.5351452671040295,
6146
+ "grad_norm": 0.7593823671340942,
6147
+ "learning_rate": 9.941132539647674e-06,
6148
+ "loss": 0.0444,
6149
+ "step": 8040
6150
+ },
6151
+ {
6152
+ "epoch": 7.544517338331771,
6153
+ "grad_norm": 0.706062376499176,
6154
+ "learning_rate": 9.941059321413406e-06,
6155
+ "loss": 0.0365,
6156
+ "step": 8050
6157
+ },
6158
+ {
6159
+ "epoch": 7.553889409559512,
6160
+ "grad_norm": 0.9754658937454224,
6161
+ "learning_rate": 9.940986103179137e-06,
6162
+ "loss": 0.0333,
6163
+ "step": 8060
6164
+ },
6165
+ {
6166
+ "epoch": 7.563261480787254,
6167
+ "grad_norm": 0.8546915054321289,
6168
+ "learning_rate": 9.940912884944867e-06,
6169
+ "loss": 0.0365,
6170
+ "step": 8070
6171
+ },
6172
+ {
6173
+ "epoch": 7.572633552014995,
6174
+ "grad_norm": 1.0958435535430908,
6175
+ "learning_rate": 9.940839666710598e-06,
6176
+ "loss": 0.0371,
6177
+ "step": 8080
6178
+ },
6179
+ {
6180
+ "epoch": 7.582005623242736,
6181
+ "grad_norm": 0.9083812832832336,
6182
+ "learning_rate": 9.940766448476329e-06,
6183
+ "loss": 0.0355,
6184
+ "step": 8090
6185
+ },
6186
+ {
6187
+ "epoch": 7.591377694470478,
6188
+ "grad_norm": 0.8183301091194153,
6189
+ "learning_rate": 9.94069323024206e-06,
6190
+ "loss": 0.0366,
6191
+ "step": 8100
6192
+ },
6193
+ {
6194
+ "epoch": 7.600749765698219,
6195
+ "grad_norm": 1.1571640968322754,
6196
+ "learning_rate": 9.94062001200779e-06,
6197
+ "loss": 0.0357,
6198
+ "step": 8110
6199
+ },
6200
+ {
6201
+ "epoch": 7.610121836925961,
6202
+ "grad_norm": 0.47001174092292786,
6203
+ "learning_rate": 9.940546793773523e-06,
6204
+ "loss": 0.0366,
6205
+ "step": 8120
6206
+ },
6207
+ {
6208
+ "epoch": 7.619493908153702,
6209
+ "grad_norm": 0.7864421010017395,
6210
+ "learning_rate": 9.940473575539254e-06,
6211
+ "loss": 0.0354,
6212
+ "step": 8130
6213
+ },
6214
+ {
6215
+ "epoch": 7.628865979381443,
6216
+ "grad_norm": 1.7657727003097534,
6217
+ "learning_rate": 9.940400357304984e-06,
6218
+ "loss": 0.0353,
6219
+ "step": 8140
6220
+ },
6221
+ {
6222
+ "epoch": 7.638238050609185,
6223
+ "grad_norm": 0.9494844079017639,
6224
+ "learning_rate": 9.940327139070715e-06,
6225
+ "loss": 0.0358,
6226
+ "step": 8150
6227
+ },
6228
+ {
6229
+ "epoch": 7.647610121836926,
6230
+ "grad_norm": 1.1095364093780518,
6231
+ "learning_rate": 9.940253920836446e-06,
6232
+ "loss": 0.0338,
6233
+ "step": 8160
6234
+ },
6235
+ {
6236
+ "epoch": 7.6569821930646675,
6237
+ "grad_norm": 0.5973043441772461,
6238
+ "learning_rate": 9.940180702602177e-06,
6239
+ "loss": 0.0332,
6240
+ "step": 8170
6241
+ },
6242
+ {
6243
+ "epoch": 7.6663542642924085,
6244
+ "grad_norm": 0.5820950865745544,
6245
+ "learning_rate": 9.940107484367907e-06,
6246
+ "loss": 0.0398,
6247
+ "step": 8180
6248
+ },
6249
+ {
6250
+ "epoch": 7.6757263355201495,
6251
+ "grad_norm": 0.8826543688774109,
6252
+ "learning_rate": 9.94003426613364e-06,
6253
+ "loss": 0.0363,
6254
+ "step": 8190
6255
+ },
6256
+ {
6257
+ "epoch": 7.685098406747891,
6258
+ "grad_norm": 1.2651371955871582,
6259
+ "learning_rate": 9.93996104789937e-06,
6260
+ "loss": 0.041,
6261
+ "step": 8200
6262
+ },
6263
+ {
6264
+ "epoch": 7.694470477975632,
6265
+ "grad_norm": 0.4515238106250763,
6266
+ "learning_rate": 9.9398878296651e-06,
6267
+ "loss": 0.0375,
6268
+ "step": 8210
6269
+ },
6270
+ {
6271
+ "epoch": 7.703842549203374,
6272
+ "grad_norm": 1.2343902587890625,
6273
+ "learning_rate": 9.939814611430832e-06,
6274
+ "loss": 0.0362,
6275
+ "step": 8220
6276
+ },
6277
+ {
6278
+ "epoch": 7.713214620431115,
6279
+ "grad_norm": 0.9942644238471985,
6280
+ "learning_rate": 9.939741393196563e-06,
6281
+ "loss": 0.029,
6282
+ "step": 8230
6283
+ },
6284
+ {
6285
+ "epoch": 7.722586691658856,
6286
+ "grad_norm": 1.327783226966858,
6287
+ "learning_rate": 9.939668174962294e-06,
6288
+ "loss": 0.0392,
6289
+ "step": 8240
6290
+ },
6291
+ {
6292
+ "epoch": 7.731958762886598,
6293
+ "grad_norm": 1.4785791635513306,
6294
+ "learning_rate": 9.939594956728024e-06,
6295
+ "loss": 0.035,
6296
+ "step": 8250
6297
+ },
6298
+ {
6299
+ "epoch": 7.731958762886598,
6300
+ "eval_loss": 0.037988826632499695,
6301
+ "eval_pearson_cosine": 0.7700406312942505,
6302
+ "eval_pearson_dot": 0.7271457314491272,
6303
+ "eval_pearson_euclidean": 0.7288488745689392,
6304
+ "eval_pearson_manhattan": 0.7308281660079956,
6305
+ "eval_runtime": 23.4237,
6306
+ "eval_samples_per_second": 64.038,
6307
+ "eval_spearman_cosine": 0.7690641250527666,
6308
+ "eval_spearman_dot": 0.72759972168602,
6309
+ "eval_spearman_euclidean": 0.7335219335323239,
6310
+ "eval_spearman_manhattan": 0.7351665552942261,
6311
+ "eval_steps_per_second": 8.026,
6312
+ "step": 8250
6313
+ },
6314
+ {
6315
+ "epoch": 7.741330834114339,
6316
+ "grad_norm": 0.9368901252746582,
6317
+ "learning_rate": 9.939521738493755e-06,
6318
+ "loss": 0.0354,
6319
+ "step": 8260
6320
+ },
6321
+ {
6322
+ "epoch": 7.750702905342081,
6323
+ "grad_norm": 0.924701452255249,
6324
+ "learning_rate": 9.939448520259486e-06,
6325
+ "loss": 0.0308,
6326
+ "step": 8270
6327
+ },
6328
+ {
6329
+ "epoch": 7.760074976569822,
6330
+ "grad_norm": 0.6925562620162964,
6331
+ "learning_rate": 9.939375302025217e-06,
6332
+ "loss": 0.0379,
6333
+ "step": 8280
6334
+ },
6335
+ {
6336
+ "epoch": 7.769447047797563,
6337
+ "grad_norm": 1.1450366973876953,
6338
+ "learning_rate": 9.939302083790947e-06,
6339
+ "loss": 0.035,
6340
+ "step": 8290
6341
+ },
6342
+ {
6343
+ "epoch": 7.778819119025305,
6344
+ "grad_norm": 1.4248292446136475,
6345
+ "learning_rate": 9.93922886555668e-06,
6346
+ "loss": 0.0425,
6347
+ "step": 8300
6348
+ },
6349
+ {
6350
+ "epoch": 7.788191190253046,
6351
+ "grad_norm": 1.1555083990097046,
6352
+ "learning_rate": 9.93915564732241e-06,
6353
+ "loss": 0.035,
6354
+ "step": 8310
6355
+ },
6356
+ {
6357
+ "epoch": 7.7975632614807875,
6358
+ "grad_norm": 0.8950551152229309,
6359
+ "learning_rate": 9.93908242908814e-06,
6360
+ "loss": 0.0371,
6361
+ "step": 8320
6362
+ },
6363
+ {
6364
+ "epoch": 7.8069353327085285,
6365
+ "grad_norm": 0.9402216076850891,
6366
+ "learning_rate": 9.939009210853872e-06,
6367
+ "loss": 0.0325,
6368
+ "step": 8330
6369
+ },
6370
+ {
6371
+ "epoch": 7.816307403936269,
6372
+ "grad_norm": 0.7723280191421509,
6373
+ "learning_rate": 9.938935992619603e-06,
6374
+ "loss": 0.0335,
6375
+ "step": 8340
6376
+ },
6377
+ {
6378
+ "epoch": 7.825679475164011,
6379
+ "grad_norm": 1.1138160228729248,
6380
+ "learning_rate": 9.938862774385334e-06,
6381
+ "loss": 0.0392,
6382
+ "step": 8350
6383
+ },
6384
+ {
6385
+ "epoch": 7.835051546391752,
6386
+ "grad_norm": 1.1937012672424316,
6387
+ "learning_rate": 9.938789556151064e-06,
6388
+ "loss": 0.0349,
6389
+ "step": 8360
6390
+ },
6391
+ {
6392
+ "epoch": 7.844423617619494,
6393
+ "grad_norm": 0.8927692174911499,
6394
+ "learning_rate": 9.938716337916797e-06,
6395
+ "loss": 0.0339,
6396
+ "step": 8370
6397
+ },
6398
+ {
6399
+ "epoch": 7.853795688847235,
6400
+ "grad_norm": 1.1513832807540894,
6401
+ "learning_rate": 9.938643119682526e-06,
6402
+ "loss": 0.039,
6403
+ "step": 8380
6404
+ },
6405
+ {
6406
+ "epoch": 7.863167760074976,
6407
+ "grad_norm": 0.6757535338401794,
6408
+ "learning_rate": 9.938569901448257e-06,
6409
+ "loss": 0.0331,
6410
+ "step": 8390
6411
+ },
6412
+ {
6413
+ "epoch": 7.872539831302718,
6414
+ "grad_norm": 0.64778071641922,
6415
+ "learning_rate": 9.938496683213989e-06,
6416
+ "loss": 0.0357,
6417
+ "step": 8400
6418
+ },
6419
+ {
6420
+ "epoch": 7.881911902530459,
6421
+ "grad_norm": 0.8938049674034119,
6422
+ "learning_rate": 9.93842346497972e-06,
6423
+ "loss": 0.0342,
6424
+ "step": 8410
6425
+ },
6426
+ {
6427
+ "epoch": 7.891283973758201,
6428
+ "grad_norm": 1.0501271486282349,
6429
+ "learning_rate": 9.93835024674545e-06,
6430
+ "loss": 0.0335,
6431
+ "step": 8420
6432
+ },
6433
+ {
6434
+ "epoch": 7.900656044985942,
6435
+ "grad_norm": 0.8977199792861938,
6436
+ "learning_rate": 9.938277028511181e-06,
6437
+ "loss": 0.0352,
6438
+ "step": 8430
6439
+ },
6440
+ {
6441
+ "epoch": 7.910028116213683,
6442
+ "grad_norm": 1.1958116292953491,
6443
+ "learning_rate": 9.938203810276912e-06,
6444
+ "loss": 0.0349,
6445
+ "step": 8440
6446
+ },
6447
+ {
6448
+ "epoch": 7.919400187441425,
6449
+ "grad_norm": 0.9677138328552246,
6450
+ "learning_rate": 9.938130592042643e-06,
6451
+ "loss": 0.0368,
6452
+ "step": 8450
6453
+ },
6454
+ {
6455
+ "epoch": 7.928772258669166,
6456
+ "grad_norm": 0.6786054372787476,
6457
+ "learning_rate": 9.938057373808374e-06,
6458
+ "loss": 0.0312,
6459
+ "step": 8460
6460
+ },
6461
+ {
6462
+ "epoch": 7.938144329896907,
6463
+ "grad_norm": 0.8180833458900452,
6464
+ "learning_rate": 9.937984155574106e-06,
6465
+ "loss": 0.0351,
6466
+ "step": 8470
6467
+ },
6468
+ {
6469
+ "epoch": 7.947516401124648,
6470
+ "grad_norm": 0.9622411727905273,
6471
+ "learning_rate": 9.937910937339837e-06,
6472
+ "loss": 0.0312,
6473
+ "step": 8480
6474
+ },
6475
+ {
6476
+ "epoch": 7.956888472352389,
6477
+ "grad_norm": 0.7947582006454468,
6478
+ "learning_rate": 9.937837719105566e-06,
6479
+ "loss": 0.0309,
6480
+ "step": 8490
6481
+ },
6482
+ {
6483
+ "epoch": 7.966260543580131,
6484
+ "grad_norm": 0.663296103477478,
6485
+ "learning_rate": 9.937764500871298e-06,
6486
+ "loss": 0.0361,
6487
+ "step": 8500
6488
+ },
6489
+ {
6490
+ "epoch": 7.966260543580131,
6491
+ "eval_loss": 0.03769104555249214,
6492
+ "eval_pearson_cosine": 0.7716894745826721,
6493
+ "eval_pearson_dot": 0.7308681011199951,
6494
+ "eval_pearson_euclidean": 0.7253518104553223,
6495
+ "eval_pearson_manhattan": 0.727583646774292,
6496
+ "eval_runtime": 21.789,
6497
+ "eval_samples_per_second": 68.842,
6498
+ "eval_spearman_cosine": 0.7708559308843369,
6499
+ "eval_spearman_dot": 0.7317227014854395,
6500
+ "eval_spearman_euclidean": 0.729650509473576,
6501
+ "eval_spearman_manhattan": 0.7317616874018321,
6502
+ "eval_steps_per_second": 8.628,
6503
+ "step": 8500
6504
+ },
6505
+ {
6506
+ "epoch": 7.975632614807872,
6507
+ "grad_norm": 0.4781196415424347,
6508
+ "learning_rate": 9.937691282637029e-06,
6509
+ "loss": 0.0322,
6510
+ "step": 8510
6511
+ },
6512
+ {
6513
+ "epoch": 7.985004686035614,
6514
+ "grad_norm": 1.5688908100128174,
6515
+ "learning_rate": 9.93761806440276e-06,
6516
+ "loss": 0.0385,
6517
+ "step": 8520
6518
+ },
6519
+ {
6520
+ "epoch": 7.994376757263355,
6521
+ "grad_norm": 0.9491916298866272,
6522
+ "learning_rate": 9.93754484616849e-06,
6523
+ "loss": 0.0349,
6524
+ "step": 8530
6525
+ },
6526
+ {
6527
+ "epoch": 8.003748828491096,
6528
+ "grad_norm": 0.5889357924461365,
6529
+ "learning_rate": 9.937471627934221e-06,
6530
+ "loss": 0.0282,
6531
+ "step": 8540
6532
+ },
6533
+ {
6534
+ "epoch": 8.013120899718837,
6535
+ "grad_norm": 0.7906449437141418,
6536
+ "learning_rate": 9.937398409699952e-06,
6537
+ "loss": 0.0236,
6538
+ "step": 8550
6539
+ },
6540
+ {
6541
+ "epoch": 8.02249297094658,
6542
+ "grad_norm": 1.4013662338256836,
6543
+ "learning_rate": 9.937325191465683e-06,
6544
+ "loss": 0.0303,
6545
+ "step": 8560
6546
+ },
6547
+ {
6548
+ "epoch": 8.03186504217432,
6549
+ "grad_norm": 1.186049461364746,
6550
+ "learning_rate": 9.937251973231414e-06,
6551
+ "loss": 0.0283,
6552
+ "step": 8570
6553
+ },
6554
+ {
6555
+ "epoch": 8.041237113402062,
6556
+ "grad_norm": 0.9762454628944397,
6557
+ "learning_rate": 9.937178754997146e-06,
6558
+ "loss": 0.0235,
6559
+ "step": 8580
6560
+ },
6561
+ {
6562
+ "epoch": 8.050609184629803,
6563
+ "grad_norm": 0.8854254484176636,
6564
+ "learning_rate": 9.937105536762877e-06,
6565
+ "loss": 0.0269,
6566
+ "step": 8590
6567
+ },
6568
+ {
6569
+ "epoch": 8.059981255857544,
6570
+ "grad_norm": 1.2090007066726685,
6571
+ "learning_rate": 9.937032318528607e-06,
6572
+ "loss": 0.0254,
6573
+ "step": 8600
6574
+ },
6575
+ {
6576
+ "epoch": 8.069353327085286,
6577
+ "grad_norm": 0.5176217555999756,
6578
+ "learning_rate": 9.936959100294338e-06,
6579
+ "loss": 0.0317,
6580
+ "step": 8610
6581
+ },
6582
+ {
6583
+ "epoch": 8.078725398313027,
6584
+ "grad_norm": 0.4938619136810303,
6585
+ "learning_rate": 9.936885882060069e-06,
6586
+ "loss": 0.0245,
6587
+ "step": 8620
6588
+ },
6589
+ {
6590
+ "epoch": 8.088097469540768,
6591
+ "grad_norm": 1.6035066843032837,
6592
+ "learning_rate": 9.9368126638258e-06,
6593
+ "loss": 0.0296,
6594
+ "step": 8630
6595
+ },
6596
+ {
6597
+ "epoch": 8.09746954076851,
6598
+ "grad_norm": 0.6895983815193176,
6599
+ "learning_rate": 9.93673944559153e-06,
6600
+ "loss": 0.0292,
6601
+ "step": 8640
6602
+ },
6603
+ {
6604
+ "epoch": 8.10684161199625,
6605
+ "grad_norm": 0.6980400085449219,
6606
+ "learning_rate": 9.936666227357263e-06,
6607
+ "loss": 0.0299,
6608
+ "step": 8650
6609
+ },
6610
+ {
6611
+ "epoch": 8.116213683223993,
6612
+ "grad_norm": 1.0714101791381836,
6613
+ "learning_rate": 9.936593009122992e-06,
6614
+ "loss": 0.0258,
6615
+ "step": 8660
6616
+ },
6617
+ {
6618
+ "epoch": 8.125585754451734,
6619
+ "grad_norm": 0.6729503273963928,
6620
+ "learning_rate": 9.936519790888723e-06,
6621
+ "loss": 0.0279,
6622
+ "step": 8670
6623
+ },
6624
+ {
6625
+ "epoch": 8.134957825679475,
6626
+ "grad_norm": 0.8938456177711487,
6627
+ "learning_rate": 9.936446572654455e-06,
6628
+ "loss": 0.0245,
6629
+ "step": 8680
6630
+ },
6631
+ {
6632
+ "epoch": 8.144329896907216,
6633
+ "grad_norm": 1.2066154479980469,
6634
+ "learning_rate": 9.936373354420186e-06,
6635
+ "loss": 0.0334,
6636
+ "step": 8690
6637
+ },
6638
+ {
6639
+ "epoch": 8.153701968134957,
6640
+ "grad_norm": 0.7639226913452148,
6641
+ "learning_rate": 9.936300136185917e-06,
6642
+ "loss": 0.0245,
6643
+ "step": 8700
6644
+ },
6645
+ {
6646
+ "epoch": 8.1630740393627,
6647
+ "grad_norm": 1.4429128170013428,
6648
+ "learning_rate": 9.936226917951647e-06,
6649
+ "loss": 0.0278,
6650
+ "step": 8710
6651
+ },
6652
+ {
6653
+ "epoch": 8.17244611059044,
6654
+ "grad_norm": 0.8992042541503906,
6655
+ "learning_rate": 9.936153699717378e-06,
6656
+ "loss": 0.0267,
6657
+ "step": 8720
6658
+ },
6659
+ {
6660
+ "epoch": 8.181818181818182,
6661
+ "grad_norm": 0.598173975944519,
6662
+ "learning_rate": 9.936080481483109e-06,
6663
+ "loss": 0.0258,
6664
+ "step": 8730
6665
+ },
6666
+ {
6667
+ "epoch": 8.191190253045923,
6668
+ "grad_norm": 0.42205601930618286,
6669
+ "learning_rate": 9.93600726324884e-06,
6670
+ "loss": 0.0323,
6671
+ "step": 8740
6672
+ },
6673
+ {
6674
+ "epoch": 8.200562324273664,
6675
+ "grad_norm": 0.584039568901062,
6676
+ "learning_rate": 9.935934045014572e-06,
6677
+ "loss": 0.0224,
6678
+ "step": 8750
6679
+ },
6680
+ {
6681
+ "epoch": 8.200562324273664,
6682
+ "eval_loss": 0.037737876176834106,
6683
+ "eval_pearson_cosine": 0.7710561156272888,
6684
+ "eval_pearson_dot": 0.7243790626525879,
6685
+ "eval_pearson_euclidean": 0.7310018539428711,
6686
+ "eval_pearson_manhattan": 0.7328372001647949,
6687
+ "eval_runtime": 24.3532,
6688
+ "eval_samples_per_second": 61.593,
6689
+ "eval_spearman_cosine": 0.7703050511110383,
6690
+ "eval_spearman_dot": 0.725368343860831,
6691
+ "eval_spearman_euclidean": 0.7355669919591825,
6692
+ "eval_spearman_manhattan": 0.7369211933770833,
6693
+ "eval_steps_per_second": 7.72,
6694
+ "step": 8750
6695
+ },
6696
+ {
6697
+ "epoch": 8.209934395501406,
6698
+ "grad_norm": 0.8525517582893372,
6699
+ "learning_rate": 9.935860826780303e-06,
6700
+ "loss": 0.0268,
6701
+ "step": 8760
6702
+ },
6703
+ {
6704
+ "epoch": 8.219306466729147,
6705
+ "grad_norm": 0.7080439329147339,
6706
+ "learning_rate": 9.935787608546034e-06,
6707
+ "loss": 0.0237,
6708
+ "step": 8770
6709
+ },
6710
+ {
6711
+ "epoch": 8.228678537956888,
6712
+ "grad_norm": 0.7084332704544067,
6713
+ "learning_rate": 9.935714390311764e-06,
6714
+ "loss": 0.0232,
6715
+ "step": 8780
6716
+ },
6717
+ {
6718
+ "epoch": 8.23805060918463,
6719
+ "grad_norm": 1.2140733003616333,
6720
+ "learning_rate": 9.935641172077495e-06,
6721
+ "loss": 0.028,
6722
+ "step": 8790
6723
+ },
6724
+ {
6725
+ "epoch": 8.24742268041237,
6726
+ "grad_norm": 0.6614952087402344,
6727
+ "learning_rate": 9.935567953843226e-06,
6728
+ "loss": 0.025,
6729
+ "step": 8800
6730
+ },
6731
+ {
6732
+ "epoch": 8.256794751640113,
6733
+ "grad_norm": 0.642755925655365,
6734
+ "learning_rate": 9.935494735608957e-06,
6735
+ "loss": 0.0259,
6736
+ "step": 8810
6737
+ },
6738
+ {
6739
+ "epoch": 8.266166822867854,
6740
+ "grad_norm": 1.1676636934280396,
6741
+ "learning_rate": 9.935421517374687e-06,
6742
+ "loss": 0.0292,
6743
+ "step": 8820
6744
+ },
6745
+ {
6746
+ "epoch": 8.275538894095595,
6747
+ "grad_norm": 0.4561503529548645,
6748
+ "learning_rate": 9.935348299140418e-06,
6749
+ "loss": 0.026,
6750
+ "step": 8830
6751
+ },
6752
+ {
6753
+ "epoch": 8.284910965323336,
6754
+ "grad_norm": 0.5693290829658508,
6755
+ "learning_rate": 9.935275080906149e-06,
6756
+ "loss": 0.0283,
6757
+ "step": 8840
6758
+ },
6759
+ {
6760
+ "epoch": 8.294283036551079,
6761
+ "grad_norm": 1.2574779987335205,
6762
+ "learning_rate": 9.935201862671881e-06,
6763
+ "loss": 0.0275,
6764
+ "step": 8850
6765
+ },
6766
+ {
6767
+ "epoch": 8.30365510777882,
6768
+ "grad_norm": 0.9662300944328308,
6769
+ "learning_rate": 9.935128644437612e-06,
6770
+ "loss": 0.0257,
6771
+ "step": 8860
6772
+ },
6773
+ {
6774
+ "epoch": 8.31302717900656,
6775
+ "grad_norm": 0.5467878580093384,
6776
+ "learning_rate": 9.935055426203343e-06,
6777
+ "loss": 0.0264,
6778
+ "step": 8870
6779
+ },
6780
+ {
6781
+ "epoch": 8.322399250234302,
6782
+ "grad_norm": 1.0672435760498047,
6783
+ "learning_rate": 9.934982207969074e-06,
6784
+ "loss": 0.0334,
6785
+ "step": 8880
6786
+ },
6787
+ {
6788
+ "epoch": 8.331771321462043,
6789
+ "grad_norm": 1.155970573425293,
6790
+ "learning_rate": 9.934908989734804e-06,
6791
+ "loss": 0.029,
6792
+ "step": 8890
6793
+ },
6794
+ {
6795
+ "epoch": 8.341143392689784,
6796
+ "grad_norm": 0.9163686037063599,
6797
+ "learning_rate": 9.934835771500535e-06,
6798
+ "loss": 0.0295,
6799
+ "step": 8900
6800
+ },
6801
+ {
6802
+ "epoch": 8.350515463917526,
6803
+ "grad_norm": 0.6844992637634277,
6804
+ "learning_rate": 9.934762553266266e-06,
6805
+ "loss": 0.0228,
6806
+ "step": 8910
6807
+ },
6808
+ {
6809
+ "epoch": 8.359887535145267,
6810
+ "grad_norm": 0.6449628472328186,
6811
+ "learning_rate": 9.934689335031997e-06,
6812
+ "loss": 0.0272,
6813
+ "step": 8920
6814
+ },
6815
+ {
6816
+ "epoch": 8.369259606373008,
6817
+ "grad_norm": 1.0157432556152344,
6818
+ "learning_rate": 9.934616116797729e-06,
6819
+ "loss": 0.0251,
6820
+ "step": 8930
6821
+ },
6822
+ {
6823
+ "epoch": 8.37863167760075,
6824
+ "grad_norm": 0.9558159112930298,
6825
+ "learning_rate": 9.93454289856346e-06,
6826
+ "loss": 0.0262,
6827
+ "step": 8940
6828
+ },
6829
+ {
6830
+ "epoch": 8.388003748828492,
6831
+ "grad_norm": 1.2592884302139282,
6832
+ "learning_rate": 9.934469680329189e-06,
6833
+ "loss": 0.0317,
6834
+ "step": 8950
6835
+ },
6836
+ {
6837
+ "epoch": 8.397375820056233,
6838
+ "grad_norm": 0.8466887474060059,
6839
+ "learning_rate": 9.934396462094921e-06,
6840
+ "loss": 0.0333,
6841
+ "step": 8960
6842
+ },
6843
+ {
6844
+ "epoch": 8.406747891283974,
6845
+ "grad_norm": 0.8453270792961121,
6846
+ "learning_rate": 9.934323243860652e-06,
6847
+ "loss": 0.0276,
6848
+ "step": 8970
6849
+ },
6850
+ {
6851
+ "epoch": 8.416119962511715,
6852
+ "grad_norm": 0.6024593710899353,
6853
+ "learning_rate": 9.934250025626383e-06,
6854
+ "loss": 0.0269,
6855
+ "step": 8980
6856
+ },
6857
+ {
6858
+ "epoch": 8.425492033739456,
6859
+ "grad_norm": 0.8663728833198547,
6860
+ "learning_rate": 9.934176807392114e-06,
6861
+ "loss": 0.0289,
6862
+ "step": 8990
6863
+ },
6864
+ {
6865
+ "epoch": 8.434864104967197,
6866
+ "grad_norm": 0.8765361905097961,
6867
+ "learning_rate": 9.934103589157846e-06,
6868
+ "loss": 0.0256,
6869
+ "step": 9000
6870
+ },
6871
+ {
6872
+ "epoch": 8.434864104967197,
6873
+ "eval_loss": 0.038624610751867294,
6874
+ "eval_pearson_cosine": 0.7652055025100708,
6875
+ "eval_pearson_dot": 0.7185550928115845,
6876
+ "eval_pearson_euclidean": 0.7254422903060913,
6877
+ "eval_pearson_manhattan": 0.7273893356323242,
6878
+ "eval_runtime": 25.8439,
6879
+ "eval_samples_per_second": 58.041,
6880
+ "eval_spearman_cosine": 0.7646832614130892,
6881
+ "eval_spearman_dot": 0.7190565869110545,
6882
+ "eval_spearman_euclidean": 0.7303235144121284,
6883
+ "eval_spearman_manhattan": 0.7319318616566108,
6884
+ "eval_steps_per_second": 7.274,
6885
+ "step": 9000
6886
  }
6887
  ],
6888
  "logging_steps": 10,