CocoRoF commited on
Commit
7350bc0
·
verified ·
1 Parent(s): 6e662dc

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:320c1a6e527bda174c669c1f951ba0d8511688825b12496cdd957e88966c44d9
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f6fca18b3d6839cfa4f9b00cec6f979a279d6161ccf0e227ea2f0e6664d6d3e
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57f4bc2a3c4abe8dda39abccfe59caa42f3c3826a1837f4bf0c5399a3526b447
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138d6cf3c8fe05fea07df883537101df6a3d38e7d05cbcc03796a983de350576
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6677d2f94c1eeef9fd86043f9720c0ec154224510969ab790d1983791f375b88
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cdaddb959868042b846248e699766aefc2fadab97732661ad902989f1034df
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4c2eafdb8010cffd636a540805e21c2ed7a900fa295f85b31d51d1a42084ef2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01a643a1ae2b83dd1c19bc6b73325f7e12cc5322058a11111e293dc5b31ae9d
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:243c3ab92fec11f8d6189a1ee0c6563a7d0fa0286f6142dbd247bcce148d357d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a90f4546ff0a4d9c836b2695bc4b1ddad6eb64e578565dd4c83c3a0c3672df7
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12c16967790d71b2e48b6adefaa270162917f0f86335d85480de9fb6860c7492
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649b5fa0e92e74982a79e3759794b1cfec60cf9441738902668d54e2ffe1767b
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:369fdbde0f14abf383325d06785d666884716d0228d7dc8c5e50c13cfb424b27
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e8b7d006141b3943e31b1b95143c70d5c410839f60e8892c3ebb5474fa5b82
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb8ed41d9a3d876261e27e3c4c409c9b564db5930687ae4ec83f98c5023be0a7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8e9d82889b9d58c21adc3199b61dc25e089ed0456cd04a5834b8213920db8d
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a76fd0f99241be8ebb729e7e60a6572e44b68c9f795cc3e8947a8ca4fa56c56
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d61bbe5a4669c770dea677fdd22d95a5f9a1874c146a203a6de6b923066699e2
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45c51dab87a7b7a65f8d23876cdf72e77eeca4ff50002d44cd0db1ecc3a43a63
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208e36b51f1fe5107b8000b99406d4ff1bd7e95578591bc1f581b4593f80e4c6
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2dc388ca3bbc2159f39c9ce16c2f601ff32a9453701746d70cc6d36087ab16b
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e0e2c967dab9f9c48f59c1d3cd0a40f676964ec54c91035ecabb3e1c2f4b45
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.560449859418932,
5
  "eval_steps": 250,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5355,6 +5355,770 @@
5355
  "eval_spearman_manhattan": 0.7397995971405482,
5356
  "eval_steps_per_second": 8.263,
5357
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5358
  }
5359
  ],
5360
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.497656982193065,
5
  "eval_steps": 250,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5355
  "eval_spearman_manhattan": 0.7397995971405482,
5356
  "eval_steps_per_second": 8.263,
5357
  "step": 7000
5358
+ },
5359
+ {
5360
+ "epoch": 6.569821930646673,
5361
+ "grad_norm": 1.1269482374191284,
5362
+ "learning_rate": 9.948674017777388e-06,
5363
+ "loss": 0.0395,
5364
+ "step": 7010
5365
+ },
5366
+ {
5367
+ "epoch": 6.579194001874415,
5368
+ "grad_norm": 0.8978859782218933,
5369
+ "learning_rate": 9.948600799543118e-06,
5370
+ "loss": 0.0438,
5371
+ "step": 7020
5372
+ },
5373
+ {
5374
+ "epoch": 6.588566073102156,
5375
+ "grad_norm": 1.3999450206756592,
5376
+ "learning_rate": 9.94852758130885e-06,
5377
+ "loss": 0.0466,
5378
+ "step": 7030
5379
+ },
5380
+ {
5381
+ "epoch": 6.597938144329897,
5382
+ "grad_norm": 0.985998272895813,
5383
+ "learning_rate": 9.948454363074582e-06,
5384
+ "loss": 0.0474,
5385
+ "step": 7040
5386
+ },
5387
+ {
5388
+ "epoch": 6.607310215557638,
5389
+ "grad_norm": 0.7843828797340393,
5390
+ "learning_rate": 9.948381144840312e-06,
5391
+ "loss": 0.0417,
5392
+ "step": 7050
5393
+ },
5394
+ {
5395
+ "epoch": 6.616682286785379,
5396
+ "grad_norm": 1.64656400680542,
5397
+ "learning_rate": 9.948307926606043e-06,
5398
+ "loss": 0.045,
5399
+ "step": 7060
5400
+ },
5401
+ {
5402
+ "epoch": 6.626054358013121,
5403
+ "grad_norm": 0.6348075866699219,
5404
+ "learning_rate": 9.948234708371774e-06,
5405
+ "loss": 0.0501,
5406
+ "step": 7070
5407
+ },
5408
+ {
5409
+ "epoch": 6.635426429240862,
5410
+ "grad_norm": 1.8781590461730957,
5411
+ "learning_rate": 9.948161490137505e-06,
5412
+ "loss": 0.0445,
5413
+ "step": 7080
5414
+ },
5415
+ {
5416
+ "epoch": 6.644798500468603,
5417
+ "grad_norm": 1.0441402196884155,
5418
+ "learning_rate": 9.948088271903235e-06,
5419
+ "loss": 0.0457,
5420
+ "step": 7090
5421
+ },
5422
+ {
5423
+ "epoch": 6.654170571696345,
5424
+ "grad_norm": 1.2460689544677734,
5425
+ "learning_rate": 9.948015053668966e-06,
5426
+ "loss": 0.0471,
5427
+ "step": 7100
5428
+ },
5429
+ {
5430
+ "epoch": 6.663542642924086,
5431
+ "grad_norm": 0.993414580821991,
5432
+ "learning_rate": 9.947941835434698e-06,
5433
+ "loss": 0.0423,
5434
+ "step": 7110
5435
+ },
5436
+ {
5437
+ "epoch": 6.672914714151828,
5438
+ "grad_norm": 1.2848552465438843,
5439
+ "learning_rate": 9.947868617200428e-06,
5440
+ "loss": 0.0414,
5441
+ "step": 7120
5442
+ },
5443
+ {
5444
+ "epoch": 6.682286785379569,
5445
+ "grad_norm": 1.2903103828430176,
5446
+ "learning_rate": 9.947795398966158e-06,
5447
+ "loss": 0.0402,
5448
+ "step": 7130
5449
+ },
5450
+ {
5451
+ "epoch": 6.69165885660731,
5452
+ "grad_norm": 1.2319235801696777,
5453
+ "learning_rate": 9.94772218073189e-06,
5454
+ "loss": 0.0504,
5455
+ "step": 7140
5456
+ },
5457
+ {
5458
+ "epoch": 6.701030927835052,
5459
+ "grad_norm": 0.8465273976325989,
5460
+ "learning_rate": 9.947648962497621e-06,
5461
+ "loss": 0.0409,
5462
+ "step": 7150
5463
+ },
5464
+ {
5465
+ "epoch": 6.710402999062793,
5466
+ "grad_norm": 1.186928153038025,
5467
+ "learning_rate": 9.947575744263352e-06,
5468
+ "loss": 0.0458,
5469
+ "step": 7160
5470
+ },
5471
+ {
5472
+ "epoch": 6.719775070290535,
5473
+ "grad_norm": 1.3528752326965332,
5474
+ "learning_rate": 9.947502526029083e-06,
5475
+ "loss": 0.0433,
5476
+ "step": 7170
5477
+ },
5478
+ {
5479
+ "epoch": 6.7291471415182755,
5480
+ "grad_norm": 0.8908892273902893,
5481
+ "learning_rate": 9.947429307794814e-06,
5482
+ "loss": 0.0456,
5483
+ "step": 7180
5484
+ },
5485
+ {
5486
+ "epoch": 6.7385192127460165,
5487
+ "grad_norm": 1.1235069036483765,
5488
+ "learning_rate": 9.947356089560544e-06,
5489
+ "loss": 0.0481,
5490
+ "step": 7190
5491
+ },
5492
+ {
5493
+ "epoch": 6.747891283973758,
5494
+ "grad_norm": 1.6809895038604736,
5495
+ "learning_rate": 9.947282871326275e-06,
5496
+ "loss": 0.0454,
5497
+ "step": 7200
5498
+ },
5499
+ {
5500
+ "epoch": 6.757263355201499,
5501
+ "grad_norm": 0.8632039427757263,
5502
+ "learning_rate": 9.947209653092008e-06,
5503
+ "loss": 0.0481,
5504
+ "step": 7210
5505
+ },
5506
+ {
5507
+ "epoch": 6.766635426429241,
5508
+ "grad_norm": 1.2185996770858765,
5509
+ "learning_rate": 9.947136434857738e-06,
5510
+ "loss": 0.0383,
5511
+ "step": 7220
5512
+ },
5513
+ {
5514
+ "epoch": 6.776007497656982,
5515
+ "grad_norm": 0.6979696154594421,
5516
+ "learning_rate": 9.947063216623467e-06,
5517
+ "loss": 0.0435,
5518
+ "step": 7230
5519
+ },
5520
+ {
5521
+ "epoch": 6.785379568884723,
5522
+ "grad_norm": 1.459441065788269,
5523
+ "learning_rate": 9.9469899983892e-06,
5524
+ "loss": 0.0449,
5525
+ "step": 7240
5526
+ },
5527
+ {
5528
+ "epoch": 6.794751640112465,
5529
+ "grad_norm": 1.0957977771759033,
5530
+ "learning_rate": 9.94691678015493e-06,
5531
+ "loss": 0.032,
5532
+ "step": 7250
5533
+ },
5534
+ {
5535
+ "epoch": 6.794751640112465,
5536
+ "eval_loss": 0.03765299916267395,
5537
+ "eval_pearson_cosine": 0.7692482471466064,
5538
+ "eval_pearson_dot": 0.722366452217102,
5539
+ "eval_pearson_euclidean": 0.7316011190414429,
5540
+ "eval_pearson_manhattan": 0.7333144545555115,
5541
+ "eval_runtime": 22.5438,
5542
+ "eval_samples_per_second": 66.537,
5543
+ "eval_spearman_cosine": 0.7695046405395065,
5544
+ "eval_spearman_dot": 0.7242050912795406,
5545
+ "eval_spearman_euclidean": 0.7356828429817377,
5546
+ "eval_spearman_manhattan": 0.737487116385034,
5547
+ "eval_steps_per_second": 8.339,
5548
+ "step": 7250
5549
+ },
5550
+ {
5551
+ "epoch": 6.804123711340206,
5552
+ "grad_norm": 1.377066731452942,
5553
+ "learning_rate": 9.946843561920661e-06,
5554
+ "loss": 0.0529,
5555
+ "step": 7260
5556
+ },
5557
+ {
5558
+ "epoch": 6.813495782567948,
5559
+ "grad_norm": 0.714728057384491,
5560
+ "learning_rate": 9.946770343686392e-06,
5561
+ "loss": 0.0432,
5562
+ "step": 7270
5563
+ },
5564
+ {
5565
+ "epoch": 6.822867853795689,
5566
+ "grad_norm": 1.4324384927749634,
5567
+ "learning_rate": 9.946697125452125e-06,
5568
+ "loss": 0.046,
5569
+ "step": 7280
5570
+ },
5571
+ {
5572
+ "epoch": 6.83223992502343,
5573
+ "grad_norm": 1.2564704418182373,
5574
+ "learning_rate": 9.946623907217854e-06,
5575
+ "loss": 0.046,
5576
+ "step": 7290
5577
+ },
5578
+ {
5579
+ "epoch": 6.841611996251172,
5580
+ "grad_norm": 0.8522197008132935,
5581
+ "learning_rate": 9.946550688983584e-06,
5582
+ "loss": 0.0393,
5583
+ "step": 7300
5584
+ },
5585
+ {
5586
+ "epoch": 6.850984067478913,
5587
+ "grad_norm": 0.8751912117004395,
5588
+ "learning_rate": 9.946477470749317e-06,
5589
+ "loss": 0.0426,
5590
+ "step": 7310
5591
+ },
5592
+ {
5593
+ "epoch": 6.8603561387066545,
5594
+ "grad_norm": 0.8960391879081726,
5595
+ "learning_rate": 9.946404252515048e-06,
5596
+ "loss": 0.0445,
5597
+ "step": 7320
5598
+ },
5599
+ {
5600
+ "epoch": 6.8697282099343955,
5601
+ "grad_norm": 1.092128872871399,
5602
+ "learning_rate": 9.946331034280778e-06,
5603
+ "loss": 0.0459,
5604
+ "step": 7330
5605
+ },
5606
+ {
5607
+ "epoch": 6.8791002811621365,
5608
+ "grad_norm": 1.1840777397155762,
5609
+ "learning_rate": 9.946257816046509e-06,
5610
+ "loss": 0.0387,
5611
+ "step": 7340
5612
+ },
5613
+ {
5614
+ "epoch": 6.888472352389878,
5615
+ "grad_norm": 1.0283764600753784,
5616
+ "learning_rate": 9.94618459781224e-06,
5617
+ "loss": 0.0577,
5618
+ "step": 7350
5619
+ },
5620
+ {
5621
+ "epoch": 6.897844423617619,
5622
+ "grad_norm": 0.749761164188385,
5623
+ "learning_rate": 9.94611137957797e-06,
5624
+ "loss": 0.0414,
5625
+ "step": 7360
5626
+ },
5627
+ {
5628
+ "epoch": 6.907216494845361,
5629
+ "grad_norm": 0.8442000150680542,
5630
+ "learning_rate": 9.946038161343701e-06,
5631
+ "loss": 0.046,
5632
+ "step": 7370
5633
+ },
5634
+ {
5635
+ "epoch": 6.916588566073102,
5636
+ "grad_norm": 1.2296583652496338,
5637
+ "learning_rate": 9.945964943109432e-06,
5638
+ "loss": 0.0412,
5639
+ "step": 7380
5640
+ },
5641
+ {
5642
+ "epoch": 6.925960637300843,
5643
+ "grad_norm": 0.6515626311302185,
5644
+ "learning_rate": 9.945891724875165e-06,
5645
+ "loss": 0.0481,
5646
+ "step": 7390
5647
+ },
5648
+ {
5649
+ "epoch": 6.935332708528585,
5650
+ "grad_norm": 1.8992091417312622,
5651
+ "learning_rate": 9.945818506640895e-06,
5652
+ "loss": 0.0431,
5653
+ "step": 7400
5654
+ },
5655
+ {
5656
+ "epoch": 6.944704779756326,
5657
+ "grad_norm": 1.1663875579833984,
5658
+ "learning_rate": 9.945745288406624e-06,
5659
+ "loss": 0.0459,
5660
+ "step": 7410
5661
+ },
5662
+ {
5663
+ "epoch": 6.954076850984068,
5664
+ "grad_norm": 0.6695976853370667,
5665
+ "learning_rate": 9.945672070172357e-06,
5666
+ "loss": 0.0448,
5667
+ "step": 7420
5668
+ },
5669
+ {
5670
+ "epoch": 6.963448922211809,
5671
+ "grad_norm": 1.158563494682312,
5672
+ "learning_rate": 9.945598851938088e-06,
5673
+ "loss": 0.0398,
5674
+ "step": 7430
5675
+ },
5676
+ {
5677
+ "epoch": 6.97282099343955,
5678
+ "grad_norm": 1.2068713903427124,
5679
+ "learning_rate": 9.945525633703818e-06,
5680
+ "loss": 0.0443,
5681
+ "step": 7440
5682
+ },
5683
+ {
5684
+ "epoch": 6.982193064667292,
5685
+ "grad_norm": 0.9688456654548645,
5686
+ "learning_rate": 9.945452415469549e-06,
5687
+ "loss": 0.0452,
5688
+ "step": 7450
5689
+ },
5690
+ {
5691
+ "epoch": 6.991565135895033,
5692
+ "grad_norm": 1.5483156442642212,
5693
+ "learning_rate": 9.94537919723528e-06,
5694
+ "loss": 0.0498,
5695
+ "step": 7460
5696
+ },
5697
+ {
5698
+ "epoch": 7.0009372071227745,
5699
+ "grad_norm": 1.18287193775177,
5700
+ "learning_rate": 9.94530597900101e-06,
5701
+ "loss": 0.0445,
5702
+ "step": 7470
5703
+ },
5704
+ {
5705
+ "epoch": 7.010309278350515,
5706
+ "grad_norm": 0.7765620946884155,
5707
+ "learning_rate": 9.945232760766741e-06,
5708
+ "loss": 0.0346,
5709
+ "step": 7480
5710
+ },
5711
+ {
5712
+ "epoch": 7.019681349578256,
5713
+ "grad_norm": 0.948760986328125,
5714
+ "learning_rate": 9.945159542532474e-06,
5715
+ "loss": 0.0348,
5716
+ "step": 7490
5717
+ },
5718
+ {
5719
+ "epoch": 7.029053420805998,
5720
+ "grad_norm": 0.9965664744377136,
5721
+ "learning_rate": 9.945086324298205e-06,
5722
+ "loss": 0.0342,
5723
+ "step": 7500
5724
+ },
5725
+ {
5726
+ "epoch": 7.029053420805998,
5727
+ "eval_loss": 0.03782695531845093,
5728
+ "eval_pearson_cosine": 0.768491804599762,
5729
+ "eval_pearson_dot": 0.7183945775032043,
5730
+ "eval_pearson_euclidean": 0.7320147752761841,
5731
+ "eval_pearson_manhattan": 0.7333334684371948,
5732
+ "eval_runtime": 21.6515,
5733
+ "eval_samples_per_second": 69.279,
5734
+ "eval_spearman_cosine": 0.7677979499645443,
5735
+ "eval_spearman_dot": 0.7186610110098233,
5736
+ "eval_spearman_euclidean": 0.7364530110375347,
5737
+ "eval_spearman_manhattan": 0.737620665225201,
5738
+ "eval_steps_per_second": 8.683,
5739
+ "step": 7500
5740
+ },
5741
+ {
5742
+ "epoch": 7.038425492033739,
5743
+ "grad_norm": 0.8594346046447754,
5744
+ "learning_rate": 9.945013106063935e-06,
5745
+ "loss": 0.0318,
5746
+ "step": 7510
5747
+ },
5748
+ {
5749
+ "epoch": 7.047797563261481,
5750
+ "grad_norm": 1.62812340259552,
5751
+ "learning_rate": 9.944939887829666e-06,
5752
+ "loss": 0.0414,
5753
+ "step": 7520
5754
+ },
5755
+ {
5756
+ "epoch": 7.057169634489222,
5757
+ "grad_norm": 1.1017098426818848,
5758
+ "learning_rate": 9.944866669595397e-06,
5759
+ "loss": 0.0327,
5760
+ "step": 7530
5761
+ },
5762
+ {
5763
+ "epoch": 7.066541705716963,
5764
+ "grad_norm": 0.8536505699157715,
5765
+ "learning_rate": 9.944793451361128e-06,
5766
+ "loss": 0.0286,
5767
+ "step": 7540
5768
+ },
5769
+ {
5770
+ "epoch": 7.075913776944705,
5771
+ "grad_norm": 1.0389901399612427,
5772
+ "learning_rate": 9.944720233126858e-06,
5773
+ "loss": 0.0365,
5774
+ "step": 7550
5775
+ },
5776
+ {
5777
+ "epoch": 7.085285848172446,
5778
+ "grad_norm": 1.0682491064071655,
5779
+ "learning_rate": 9.94464701489259e-06,
5780
+ "loss": 0.034,
5781
+ "step": 7560
5782
+ },
5783
+ {
5784
+ "epoch": 7.094657919400188,
5785
+ "grad_norm": 0.8786489963531494,
5786
+ "learning_rate": 9.944573796658321e-06,
5787
+ "loss": 0.0373,
5788
+ "step": 7570
5789
+ },
5790
+ {
5791
+ "epoch": 7.104029990627929,
5792
+ "grad_norm": 1.3642008304595947,
5793
+ "learning_rate": 9.94450057842405e-06,
5794
+ "loss": 0.0314,
5795
+ "step": 7580
5796
+ },
5797
+ {
5798
+ "epoch": 7.11340206185567,
5799
+ "grad_norm": 0.7243325114250183,
5800
+ "learning_rate": 9.944427360189783e-06,
5801
+ "loss": 0.0299,
5802
+ "step": 7590
5803
+ },
5804
+ {
5805
+ "epoch": 7.122774133083412,
5806
+ "grad_norm": 0.6696385145187378,
5807
+ "learning_rate": 9.944354141955514e-06,
5808
+ "loss": 0.0311,
5809
+ "step": 7600
5810
+ },
5811
+ {
5812
+ "epoch": 7.1321462043111525,
5813
+ "grad_norm": 1.03152334690094,
5814
+ "learning_rate": 9.944280923721244e-06,
5815
+ "loss": 0.0355,
5816
+ "step": 7610
5817
+ },
5818
+ {
5819
+ "epoch": 7.141518275538894,
5820
+ "grad_norm": 0.8586616516113281,
5821
+ "learning_rate": 9.944207705486975e-06,
5822
+ "loss": 0.0394,
5823
+ "step": 7620
5824
+ },
5825
+ {
5826
+ "epoch": 7.150890346766635,
5827
+ "grad_norm": 0.9514285922050476,
5828
+ "learning_rate": 9.944134487252706e-06,
5829
+ "loss": 0.035,
5830
+ "step": 7630
5831
+ },
5832
+ {
5833
+ "epoch": 7.160262417994376,
5834
+ "grad_norm": 0.8053460717201233,
5835
+ "learning_rate": 9.944061269018437e-06,
5836
+ "loss": 0.0312,
5837
+ "step": 7640
5838
+ },
5839
+ {
5840
+ "epoch": 7.169634489222118,
5841
+ "grad_norm": 1.0056674480438232,
5842
+ "learning_rate": 9.943988050784167e-06,
5843
+ "loss": 0.0371,
5844
+ "step": 7650
5845
+ },
5846
+ {
5847
+ "epoch": 7.179006560449859,
5848
+ "grad_norm": 0.7738359570503235,
5849
+ "learning_rate": 9.943914832549898e-06,
5850
+ "loss": 0.0302,
5851
+ "step": 7660
5852
+ },
5853
+ {
5854
+ "epoch": 7.188378631677601,
5855
+ "grad_norm": 1.039197325706482,
5856
+ "learning_rate": 9.94384161431563e-06,
5857
+ "loss": 0.0316,
5858
+ "step": 7670
5859
+ },
5860
+ {
5861
+ "epoch": 7.197750702905342,
5862
+ "grad_norm": 1.578165888786316,
5863
+ "learning_rate": 9.943768396081361e-06,
5864
+ "loss": 0.0388,
5865
+ "step": 7680
5866
+ },
5867
+ {
5868
+ "epoch": 7.207122774133083,
5869
+ "grad_norm": 1.1753205060958862,
5870
+ "learning_rate": 9.943695177847092e-06,
5871
+ "loss": 0.0387,
5872
+ "step": 7690
5873
+ },
5874
+ {
5875
+ "epoch": 7.216494845360825,
5876
+ "grad_norm": 1.295299768447876,
5877
+ "learning_rate": 9.943621959612823e-06,
5878
+ "loss": 0.0417,
5879
+ "step": 7700
5880
+ },
5881
+ {
5882
+ "epoch": 7.225866916588566,
5883
+ "grad_norm": 0.9477363228797913,
5884
+ "learning_rate": 9.943548741378554e-06,
5885
+ "loss": 0.0305,
5886
+ "step": 7710
5887
+ },
5888
+ {
5889
+ "epoch": 7.235238987816308,
5890
+ "grad_norm": 1.0547223091125488,
5891
+ "learning_rate": 9.943475523144284e-06,
5892
+ "loss": 0.0314,
5893
+ "step": 7720
5894
+ },
5895
+ {
5896
+ "epoch": 7.244611059044049,
5897
+ "grad_norm": 1.4873117208480835,
5898
+ "learning_rate": 9.943402304910015e-06,
5899
+ "loss": 0.0302,
5900
+ "step": 7730
5901
+ },
5902
+ {
5903
+ "epoch": 7.25398313027179,
5904
+ "grad_norm": 0.9882778525352478,
5905
+ "learning_rate": 9.943329086675748e-06,
5906
+ "loss": 0.0328,
5907
+ "step": 7740
5908
+ },
5909
+ {
5910
+ "epoch": 7.2633552014995315,
5911
+ "grad_norm": 1.3187719583511353,
5912
+ "learning_rate": 9.943255868441477e-06,
5913
+ "loss": 0.0341,
5914
+ "step": 7750
5915
+ },
5916
+ {
5917
+ "epoch": 7.2633552014995315,
5918
+ "eval_loss": 0.03773624449968338,
5919
+ "eval_pearson_cosine": 0.7699387073516846,
5920
+ "eval_pearson_dot": 0.7237234115600586,
5921
+ "eval_pearson_euclidean": 0.7316513061523438,
5922
+ "eval_pearson_manhattan": 0.7335678339004517,
5923
+ "eval_runtime": 22.1612,
5924
+ "eval_samples_per_second": 67.686,
5925
+ "eval_spearman_cosine": 0.7694615753118931,
5926
+ "eval_spearman_dot": 0.7243788947148158,
5927
+ "eval_spearman_euclidean": 0.7361849268567764,
5928
+ "eval_spearman_manhattan": 0.7377945356892571,
5929
+ "eval_steps_per_second": 8.483,
5930
+ "step": 7750
5931
+ },
5932
+ {
5933
+ "epoch": 7.2727272727272725,
5934
+ "grad_norm": 1.0984870195388794,
5935
+ "learning_rate": 9.943182650207207e-06,
5936
+ "loss": 0.0329,
5937
+ "step": 7760
5938
+ },
5939
+ {
5940
+ "epoch": 7.282099343955014,
5941
+ "grad_norm": 0.7666100263595581,
5942
+ "learning_rate": 9.94310943197294e-06,
5943
+ "loss": 0.0358,
5944
+ "step": 7770
5945
+ },
5946
+ {
5947
+ "epoch": 7.291471415182755,
5948
+ "grad_norm": 0.9941838383674622,
5949
+ "learning_rate": 9.94303621373867e-06,
5950
+ "loss": 0.0351,
5951
+ "step": 7780
5952
+ },
5953
+ {
5954
+ "epoch": 7.300843486410496,
5955
+ "grad_norm": 1.3012335300445557,
5956
+ "learning_rate": 9.942962995504401e-06,
5957
+ "loss": 0.0296,
5958
+ "step": 7790
5959
+ },
5960
+ {
5961
+ "epoch": 7.310215557638238,
5962
+ "grad_norm": 1.1914719343185425,
5963
+ "learning_rate": 9.942889777270132e-06,
5964
+ "loss": 0.0333,
5965
+ "step": 7800
5966
+ },
5967
+ {
5968
+ "epoch": 7.319587628865979,
5969
+ "grad_norm": 1.1405929327011108,
5970
+ "learning_rate": 9.942816559035863e-06,
5971
+ "loss": 0.0408,
5972
+ "step": 7810
5973
+ },
5974
+ {
5975
+ "epoch": 7.328959700093721,
5976
+ "grad_norm": 0.665600061416626,
5977
+ "learning_rate": 9.942743340801594e-06,
5978
+ "loss": 0.0314,
5979
+ "step": 7820
5980
+ },
5981
+ {
5982
+ "epoch": 7.338331771321462,
5983
+ "grad_norm": 1.2029966115951538,
5984
+ "learning_rate": 9.942670122567324e-06,
5985
+ "loss": 0.041,
5986
+ "step": 7830
5987
+ },
5988
+ {
5989
+ "epoch": 7.347703842549203,
5990
+ "grad_norm": 0.44810751080513,
5991
+ "learning_rate": 9.942596904333057e-06,
5992
+ "loss": 0.0317,
5993
+ "step": 7840
5994
+ },
5995
+ {
5996
+ "epoch": 7.357075913776945,
5997
+ "grad_norm": 1.565082311630249,
5998
+ "learning_rate": 9.942523686098788e-06,
5999
+ "loss": 0.035,
6000
+ "step": 7850
6001
+ },
6002
+ {
6003
+ "epoch": 7.366447985004686,
6004
+ "grad_norm": 1.6850316524505615,
6005
+ "learning_rate": 9.942450467864517e-06,
6006
+ "loss": 0.0365,
6007
+ "step": 7860
6008
+ },
6009
+ {
6010
+ "epoch": 7.375820056232428,
6011
+ "grad_norm": 1.0027261972427368,
6012
+ "learning_rate": 9.942377249630249e-06,
6013
+ "loss": 0.0309,
6014
+ "step": 7870
6015
+ },
6016
+ {
6017
+ "epoch": 7.385192127460169,
6018
+ "grad_norm": 0.51674485206604,
6019
+ "learning_rate": 9.94230403139598e-06,
6020
+ "loss": 0.0321,
6021
+ "step": 7880
6022
+ },
6023
+ {
6024
+ "epoch": 7.39456419868791,
6025
+ "grad_norm": 1.0429599285125732,
6026
+ "learning_rate": 9.94223081316171e-06,
6027
+ "loss": 0.033,
6028
+ "step": 7890
6029
+ },
6030
+ {
6031
+ "epoch": 7.4039362699156515,
6032
+ "grad_norm": 0.618232250213623,
6033
+ "learning_rate": 9.942157594927441e-06,
6034
+ "loss": 0.0353,
6035
+ "step": 7900
6036
+ },
6037
+ {
6038
+ "epoch": 7.413308341143392,
6039
+ "grad_norm": 0.9780518412590027,
6040
+ "learning_rate": 9.942084376693174e-06,
6041
+ "loss": 0.0354,
6042
+ "step": 7910
6043
+ },
6044
+ {
6045
+ "epoch": 7.422680412371134,
6046
+ "grad_norm": 1.214362621307373,
6047
+ "learning_rate": 9.942011158458903e-06,
6048
+ "loss": 0.0338,
6049
+ "step": 7920
6050
+ },
6051
+ {
6052
+ "epoch": 7.432052483598875,
6053
+ "grad_norm": 1.202986240386963,
6054
+ "learning_rate": 9.941937940224634e-06,
6055
+ "loss": 0.0387,
6056
+ "step": 7930
6057
+ },
6058
+ {
6059
+ "epoch": 7.441424554826616,
6060
+ "grad_norm": 1.4128488302230835,
6061
+ "learning_rate": 9.941864721990366e-06,
6062
+ "loss": 0.0315,
6063
+ "step": 7940
6064
+ },
6065
+ {
6066
+ "epoch": 7.450796626054358,
6067
+ "grad_norm": 0.7198026180267334,
6068
+ "learning_rate": 9.941791503756097e-06,
6069
+ "loss": 0.0338,
6070
+ "step": 7950
6071
+ },
6072
+ {
6073
+ "epoch": 7.460168697282099,
6074
+ "grad_norm": 1.1124250888824463,
6075
+ "learning_rate": 9.941718285521828e-06,
6076
+ "loss": 0.0352,
6077
+ "step": 7960
6078
+ },
6079
+ {
6080
+ "epoch": 7.469540768509841,
6081
+ "grad_norm": 1.0420817136764526,
6082
+ "learning_rate": 9.941645067287558e-06,
6083
+ "loss": 0.0338,
6084
+ "step": 7970
6085
+ },
6086
+ {
6087
+ "epoch": 7.478912839737582,
6088
+ "grad_norm": 0.9638373255729675,
6089
+ "learning_rate": 9.941571849053289e-06,
6090
+ "loss": 0.0356,
6091
+ "step": 7980
6092
+ },
6093
+ {
6094
+ "epoch": 7.488284910965323,
6095
+ "grad_norm": 0.8584896922111511,
6096
+ "learning_rate": 9.94149863081902e-06,
6097
+ "loss": 0.0353,
6098
+ "step": 7990
6099
+ },
6100
+ {
6101
+ "epoch": 7.497656982193065,
6102
+ "grad_norm": 0.7161556482315063,
6103
+ "learning_rate": 9.94142541258475e-06,
6104
+ "loss": 0.0329,
6105
+ "step": 8000
6106
+ },
6107
+ {
6108
+ "epoch": 7.497656982193065,
6109
+ "eval_loss": 0.03753030672669411,
6110
+ "eval_pearson_cosine": 0.7705868482589722,
6111
+ "eval_pearson_dot": 0.7248358726501465,
6112
+ "eval_pearson_euclidean": 0.734631359577179,
6113
+ "eval_pearson_manhattan": 0.7363988161087036,
6114
+ "eval_runtime": 22.3628,
6115
+ "eval_samples_per_second": 67.076,
6116
+ "eval_spearman_cosine": 0.769708288306187,
6117
+ "eval_spearman_dot": 0.7249767839130733,
6118
+ "eval_spearman_euclidean": 0.7394619718544255,
6119
+ "eval_spearman_manhattan": 0.7409361299302836,
6120
+ "eval_steps_per_second": 8.407,
6121
+ "step": 8000
6122
  }
6123
  ],
6124
  "logging_steps": 10,