mtzig commited on
Commit
50d3933
·
verified ·
1 Parent(s): ed404a3

Training in progress, step 1180, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2283e0a60eecf0e26db089b1ea106db4a8943588d7c5d1ae539887caf2a0623
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ee0fa3cfc5342a2048887d02427e60ff0afc569e7e72f28871b43609a7a5e1
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:039e2daa021420447c63254cecab80fa9d8c4955bb82c268ed4412f9d7cc457e
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f3ceb0fb772fe24eaea01b326a2b2121847999c2f7a22802c742da1249ba05
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bb386b06abed058c971637632b15f30bf72e3f0fed658811dd9d1879b2cc249
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456d2a34230921bdb7b561893b5af0cb6d8091afa425c17ee126aa146288889e
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b690d2a646fe4b43296ed7a11b9ed92846b14116b36ab4f256a0c0a38193fec5
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28617eadf43410538e6a163b4682a3582927554dfc2ebcfeda5f2302a4f4912b
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40e60cce6b988d761a7cb0a778490b5bef357343f215f4368e74b271a3544add
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b2105cd0327f43c5634a1c90bb691d1139c485a9227b49d51855ad6d5171c7
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fbc16fe00aa2316ff9c35dc5c163a58bd61d544a0b373903f53b1dd607744f0
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7dbf277a9ad869e3bc7ce92d0b6beb5dc907e7e8e3e8828157b5e8c8b2c1b41
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c26b5d36343cbeb9400248846e9572b9226655bba020bfffb57eb55ac503eaa0
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f74810044cc457671581eba123717edb24dcb5d167ecc94676bd0e932fe808
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80e37075e6a4c78861839fc539511c538b95c0905a026bfa2e725cda89b0c3ea
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7e6b1dcce64027f24e8b2d594477e039fa19287eca4818051ab9d42c792951
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47c3058111d1013a8c7bee77444904a0475c2f0bffb7d0c4d6c87291a641236d
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e46a58a2528053869840946c7933b4f9bc1822ca9c3fb63b1ff28c148d5b1b29
3
  size 15088
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ad5b62f0cc16eba86ed43478cd91d2660dc90a4ec6abf69d8eba3dc9fd166bb
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6a28d5179eb5d23e5f03c55f87dfd35453c901e36b529451aecad818e40b0b
3
  size 15088
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b7a7201b10eea93ada67755ff9fd428efa7903fb7a7749a862485dd229f2c71
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec18cd050d35eff43c748243e23cab1004b447bcd8e50aa6f3860476da3cb24
3
  size 15088
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc03cdb9d7cd36d375ad07cb7f8faa863f3ab0b15c5d442b25b00ccb4f627e1
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34d8cb5b591066154ac37d679613e29503a64988ff370244e9998659dec40ef6
3
  size 15088
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b064707c1d587f8d90b9fc42a28979d7e7f25a60fc5fe43535927f2c04c08c63
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8568dd25b7f4b2267897ddbe3a35792475b47cddd04bad6ae5cd4aa526e892ed
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9322033898305084,
5
  "eval_steps": 20,
6
- "global_step": 1100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8379,6 +8379,614 @@
8379
  "eval_samples_per_second": 5.654,
8380
  "eval_steps_per_second": 0.194,
8381
  "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8382
  }
8383
  ],
8384
  "logging_steps": 1,
@@ -8393,12 +9001,12 @@
8393
  "should_evaluate": false,
8394
  "should_log": false,
8395
  "should_save": true,
8396
- "should_training_stop": false
8397
  },
8398
  "attributes": {}
8399
  }
8400
  },
8401
- "total_flos": 3.384126928028959e+17,
8402
  "train_batch_size": 8,
8403
  "trial_name": null,
8404
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 20,
6
+ "global_step": 1180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8379
  "eval_samples_per_second": 5.654,
8380
  "eval_steps_per_second": 0.194,
8381
  "step": 1100
8382
+ },
8383
+ {
8384
+ "epoch": 0.9330508474576271,
8385
+ "grad_norm": 2.0700523853302,
8386
+ "learning_rate": 2.7183003920826846e-07,
8387
+ "loss": 0.015,
8388
+ "step": 1101
8389
+ },
8390
+ {
8391
+ "epoch": 0.9338983050847458,
8392
+ "grad_norm": 0.7995718717575073,
8393
+ "learning_rate": 2.6502218965613335e-07,
8394
+ "loss": 0.0059,
8395
+ "step": 1102
8396
+ },
8397
+ {
8398
+ "epoch": 0.9347457627118644,
8399
+ "grad_norm": 1.953245759010315,
8400
+ "learning_rate": 2.5829952946509585e-07,
8401
+ "loss": 0.0084,
8402
+ "step": 1103
8403
+ },
8404
+ {
8405
+ "epoch": 0.9355932203389831,
8406
+ "grad_norm": 1.5361946821212769,
8407
+ "learning_rate": 2.5166211746416534e-07,
8408
+ "loss": 0.0078,
8409
+ "step": 1104
8410
+ },
8411
+ {
8412
+ "epoch": 0.9364406779661016,
8413
+ "grad_norm": 1.721569299697876,
8414
+ "learning_rate": 2.451100117363603e-07,
8415
+ "loss": 0.0134,
8416
+ "step": 1105
8417
+ },
8418
+ {
8419
+ "epoch": 0.9372881355932203,
8420
+ "grad_norm": 0.24595603346824646,
8421
+ "learning_rate": 2.386432696181973e-07,
8422
+ "loss": 0.0011,
8423
+ "step": 1106
8424
+ },
8425
+ {
8426
+ "epoch": 0.938135593220339,
8427
+ "grad_norm": 0.49296998977661133,
8428
+ "learning_rate": 2.3226194769918497e-07,
8429
+ "loss": 0.0035,
8430
+ "step": 1107
8431
+ },
8432
+ {
8433
+ "epoch": 0.9389830508474576,
8434
+ "grad_norm": 0.35648539662361145,
8435
+ "learning_rate": 2.2596610182133328e-07,
8436
+ "loss": 0.0014,
8437
+ "step": 1108
8438
+ },
8439
+ {
8440
+ "epoch": 0.9398305084745763,
8441
+ "grad_norm": 0.42918694019317627,
8442
+ "learning_rate": 2.1975578707866818e-07,
8443
+ "loss": 0.0037,
8444
+ "step": 1109
8445
+ },
8446
+ {
8447
+ "epoch": 0.940677966101695,
8448
+ "grad_norm": 0.3251606822013855,
8449
+ "learning_rate": 2.1363105781673888e-07,
8450
+ "loss": 0.0015,
8451
+ "step": 1110
8452
+ },
8453
+ {
8454
+ "epoch": 0.9415254237288135,
8455
+ "grad_norm": 1.1148656606674194,
8456
+ "learning_rate": 2.0759196763215362e-07,
8457
+ "loss": 0.0024,
8458
+ "step": 1111
8459
+ },
8460
+ {
8461
+ "epoch": 0.9423728813559322,
8462
+ "grad_norm": 1.0703065395355225,
8463
+ "learning_rate": 2.0163856937210236e-07,
8464
+ "loss": 0.0057,
8465
+ "step": 1112
8466
+ },
8467
+ {
8468
+ "epoch": 0.9432203389830508,
8469
+ "grad_norm": 0.5219106078147888,
8470
+ "learning_rate": 1.9577091513389933e-07,
8471
+ "loss": 0.0019,
8472
+ "step": 1113
8473
+ },
8474
+ {
8475
+ "epoch": 0.9440677966101695,
8476
+ "grad_norm": 1.2193870544433594,
8477
+ "learning_rate": 1.8998905626452568e-07,
8478
+ "loss": 0.0039,
8479
+ "step": 1114
8480
+ },
8481
+ {
8482
+ "epoch": 0.9449152542372882,
8483
+ "grad_norm": 0.7670567035675049,
8484
+ "learning_rate": 1.842930433601775e-07,
8485
+ "loss": 0.0023,
8486
+ "step": 1115
8487
+ },
8488
+ {
8489
+ "epoch": 0.9457627118644067,
8490
+ "grad_norm": 1.6820135116577148,
8491
+ "learning_rate": 1.7868292626582851e-07,
8492
+ "loss": 0.0066,
8493
+ "step": 1116
8494
+ },
8495
+ {
8496
+ "epoch": 0.9466101694915254,
8497
+ "grad_norm": 0.2756711542606354,
8498
+ "learning_rate": 1.731587540747903e-07,
8499
+ "loss": 0.0018,
8500
+ "step": 1117
8501
+ },
8502
+ {
8503
+ "epoch": 0.9474576271186441,
8504
+ "grad_norm": 0.09309152513742447,
8505
+ "learning_rate": 1.6772057512828178e-07,
8506
+ "loss": 0.0005,
8507
+ "step": 1118
8508
+ },
8509
+ {
8510
+ "epoch": 0.9483050847457627,
8511
+ "grad_norm": 0.9462043642997742,
8512
+ "learning_rate": 1.6236843701500692e-07,
8513
+ "loss": 0.0045,
8514
+ "step": 1119
8515
+ },
8516
+ {
8517
+ "epoch": 0.9491525423728814,
8518
+ "grad_norm": 0.3377615213394165,
8519
+ "learning_rate": 1.5710238657074218e-07,
8520
+ "loss": 0.0015,
8521
+ "step": 1120
8522
+ },
8523
+ {
8524
+ "epoch": 0.9491525423728814,
8525
+ "eval_accuracy": 1.0,
8526
+ "eval_f1": 1.0,
8527
+ "eval_loss": 0.00010440533515065908,
8528
+ "eval_precision": 1.0,
8529
+ "eval_recall": 1.0,
8530
+ "eval_runtime": 50.0065,
8531
+ "eval_samples_per_second": 5.839,
8532
+ "eval_steps_per_second": 0.2,
8533
+ "step": 1120
8534
+ },
8535
+ {
8536
+ "epoch": 0.95,
8537
+ "grad_norm": 1.3986833095550537,
8538
+ "learning_rate": 1.519224698779198e-07,
8539
+ "loss": 0.0143,
8540
+ "step": 1121
8541
+ },
8542
+ {
8543
+ "epoch": 0.9508474576271186,
8544
+ "grad_norm": 2.198768377304077,
8545
+ "learning_rate": 1.4682873226523064e-07,
8546
+ "loss": 0.0134,
8547
+ "step": 1122
8548
+ },
8549
+ {
8550
+ "epoch": 0.9516949152542373,
8551
+ "grad_norm": 3.4906070232391357,
8552
+ "learning_rate": 1.418212183072254e-07,
8553
+ "loss": 0.023,
8554
+ "step": 1123
8555
+ },
8556
+ {
8557
+ "epoch": 0.9525423728813559,
8558
+ "grad_norm": 2.3907110691070557,
8559
+ "learning_rate": 1.3689997182392278e-07,
8560
+ "loss": 0.0341,
8561
+ "step": 1124
8562
+ },
8563
+ {
8564
+ "epoch": 0.9533898305084746,
8565
+ "grad_norm": 1.1482832431793213,
8566
+ "learning_rate": 1.3206503588042762e-07,
8567
+ "loss": 0.0046,
8568
+ "step": 1125
8569
+ },
8570
+ {
8571
+ "epoch": 0.9542372881355933,
8572
+ "grad_norm": 0.41303661465644836,
8573
+ "learning_rate": 1.2731645278655448e-07,
8574
+ "loss": 0.0015,
8575
+ "step": 1126
8576
+ },
8577
+ {
8578
+ "epoch": 0.9550847457627119,
8579
+ "grad_norm": 2.1115567684173584,
8580
+ "learning_rate": 1.2265426409645676e-07,
8581
+ "loss": 0.0193,
8582
+ "step": 1127
8583
+ },
8584
+ {
8585
+ "epoch": 0.9559322033898305,
8586
+ "grad_norm": 0.9733043909072876,
8587
+ "learning_rate": 1.180785106082627e-07,
8588
+ "loss": 0.0063,
8589
+ "step": 1128
8590
+ },
8591
+ {
8592
+ "epoch": 0.9567796610169491,
8593
+ "grad_norm": 1.6615018844604492,
8594
+ "learning_rate": 1.1358923236371888e-07,
8595
+ "loss": 0.0143,
8596
+ "step": 1129
8597
+ },
8598
+ {
8599
+ "epoch": 0.9576271186440678,
8600
+ "grad_norm": 2.9333720207214355,
8601
+ "learning_rate": 1.0918646864784166e-07,
8602
+ "loss": 0.0204,
8603
+ "step": 1130
8604
+ },
8605
+ {
8606
+ "epoch": 0.9584745762711865,
8607
+ "grad_norm": 0.4009217619895935,
8608
+ "learning_rate": 1.0487025798856632e-07,
8609
+ "loss": 0.0019,
8610
+ "step": 1131
8611
+ },
8612
+ {
8613
+ "epoch": 0.9593220338983051,
8614
+ "grad_norm": 0.5355218052864075,
8615
+ "learning_rate": 1.0064063815642178e-07,
8616
+ "loss": 0.0024,
8617
+ "step": 1132
8618
+ },
8619
+ {
8620
+ "epoch": 0.9601694915254237,
8621
+ "grad_norm": 1.6377936601638794,
8622
+ "learning_rate": 9.649764616418866e-08,
8623
+ "loss": 0.018,
8624
+ "step": 1133
8625
+ },
8626
+ {
8627
+ "epoch": 0.9610169491525423,
8628
+ "grad_norm": 1.1204107999801636,
8629
+ "learning_rate": 9.244131826658065e-08,
8630
+ "loss": 0.0128,
8631
+ "step": 1134
8632
+ },
8633
+ {
8634
+ "epoch": 0.961864406779661,
8635
+ "grad_norm": 1.4632524251937866,
8636
+ "learning_rate": 8.847168995992916e-08,
8637
+ "loss": 0.0098,
8638
+ "step": 1135
8639
+ },
8640
+ {
8641
+ "epoch": 0.9627118644067797,
8642
+ "grad_norm": 0.7185283303260803,
8643
+ "learning_rate": 8.458879598186586e-08,
8644
+ "loss": 0.0023,
8645
+ "step": 1136
8646
+ },
8647
+ {
8648
+ "epoch": 0.9635593220338983,
8649
+ "grad_norm": 2.070397138595581,
8650
+ "learning_rate": 8.079267031102844e-08,
8651
+ "loss": 0.014,
8652
+ "step": 1137
8653
+ },
8654
+ {
8655
+ "epoch": 0.964406779661017,
8656
+ "grad_norm": 2.4648094177246094,
8657
+ "learning_rate": 7.708334616675417e-08,
8658
+ "loss": 0.0103,
8659
+ "step": 1138
8660
+ },
8661
+ {
8662
+ "epoch": 0.9652542372881356,
8663
+ "grad_norm": 1.3357059955596924,
8664
+ "learning_rate": 7.34608560087946e-08,
8665
+ "loss": 0.0114,
8666
+ "step": 1139
8667
+ },
8668
+ {
8669
+ "epoch": 0.9661016949152542,
8670
+ "grad_norm": 0.8426046371459961,
8671
+ "learning_rate": 6.99252315370269e-08,
8672
+ "loss": 0.0059,
8673
+ "step": 1140
8674
+ },
8675
+ {
8676
+ "epoch": 0.9661016949152542,
8677
+ "eval_accuracy": 1.0,
8678
+ "eval_f1": 1.0,
8679
+ "eval_loss": 0.00011169948993483558,
8680
+ "eval_precision": 1.0,
8681
+ "eval_recall": 1.0,
8682
+ "eval_runtime": 50.2826,
8683
+ "eval_samples_per_second": 5.807,
8684
+ "eval_steps_per_second": 0.199,
8685
+ "step": 1140
8686
+ },
8687
+ {
8688
+ "epoch": 0.9669491525423729,
8689
+ "grad_norm": 0.7774562835693359,
8690
+ "learning_rate": 6.647650369118408e-08,
8691
+ "loss": 0.0078,
8692
+ "step": 1141
8693
+ },
8694
+ {
8695
+ "epoch": 0.9677966101694915,
8696
+ "grad_norm": 2.7641003131866455,
8697
+ "learning_rate": 6.311470265057518e-08,
8698
+ "loss": 0.0138,
8699
+ "step": 1142
8700
+ },
8701
+ {
8702
+ "epoch": 0.9686440677966102,
8703
+ "grad_norm": 2.618208885192871,
8704
+ "learning_rate": 5.983985783382773e-08,
8705
+ "loss": 0.0202,
8706
+ "step": 1143
8707
+ },
8708
+ {
8709
+ "epoch": 0.9694915254237289,
8710
+ "grad_norm": 1.5658472776412964,
8711
+ "learning_rate": 5.665199789862907e-08,
8712
+ "loss": 0.0201,
8713
+ "step": 1144
8714
+ },
8715
+ {
8716
+ "epoch": 0.9703389830508474,
8717
+ "grad_norm": 0.43462297320365906,
8718
+ "learning_rate": 5.3551150741472104e-08,
8719
+ "loss": 0.002,
8720
+ "step": 1145
8721
+ },
8722
+ {
8723
+ "epoch": 0.9711864406779661,
8724
+ "grad_norm": 1.4689350128173828,
8725
+ "learning_rate": 5.0537343497414346e-08,
8726
+ "loss": 0.009,
8727
+ "step": 1146
8728
+ },
8729
+ {
8730
+ "epoch": 0.9720338983050848,
8731
+ "grad_norm": 2.49544620513916,
8732
+ "learning_rate": 4.761060253984151e-08,
8733
+ "loss": 0.0279,
8734
+ "step": 1147
8735
+ },
8736
+ {
8737
+ "epoch": 0.9728813559322034,
8738
+ "grad_norm": 1.6884862184524536,
8739
+ "learning_rate": 4.47709534802343e-08,
8740
+ "loss": 0.0078,
8741
+ "step": 1148
8742
+ },
8743
+ {
8744
+ "epoch": 0.9737288135593221,
8745
+ "grad_norm": 0.37377646565437317,
8746
+ "learning_rate": 4.201842116794308e-08,
8747
+ "loss": 0.0023,
8748
+ "step": 1149
8749
+ },
8750
+ {
8751
+ "epoch": 0.9745762711864406,
8752
+ "grad_norm": 0.7765163779258728,
8753
+ "learning_rate": 3.9353029689974676e-08,
8754
+ "loss": 0.0038,
8755
+ "step": 1150
8756
+ },
8757
+ {
8758
+ "epoch": 0.9754237288135593,
8759
+ "grad_norm": 0.513041615486145,
8760
+ "learning_rate": 3.677480237077813e-08,
8761
+ "loss": 0.0016,
8762
+ "step": 1151
8763
+ },
8764
+ {
8765
+ "epoch": 0.976271186440678,
8766
+ "grad_norm": 0.7876710891723633,
8767
+ "learning_rate": 3.4283761772042623e-08,
8768
+ "loss": 0.0056,
8769
+ "step": 1152
8770
+ },
8771
+ {
8772
+ "epoch": 0.9771186440677966,
8773
+ "grad_norm": 0.6897133588790894,
8774
+ "learning_rate": 3.187992969249876e-08,
8775
+ "loss": 0.0034,
8776
+ "step": 1153
8777
+ },
8778
+ {
8779
+ "epoch": 0.9779661016949153,
8780
+ "grad_norm": 1.7506352663040161,
8781
+ "learning_rate": 2.9563327167727585e-08,
8782
+ "loss": 0.0099,
8783
+ "step": 1154
8784
+ },
8785
+ {
8786
+ "epoch": 0.9788135593220338,
8787
+ "grad_norm": 2.4349284172058105,
8788
+ "learning_rate": 2.733397446997632e-08,
8789
+ "loss": 0.0097,
8790
+ "step": 1155
8791
+ },
8792
+ {
8793
+ "epoch": 0.9796610169491525,
8794
+ "grad_norm": 1.168368935585022,
8795
+ "learning_rate": 2.5191891107985143e-08,
8796
+ "loss": 0.0082,
8797
+ "step": 1156
8798
+ },
8799
+ {
8800
+ "epoch": 0.9805084745762712,
8801
+ "grad_norm": 1.3423504829406738,
8802
+ "learning_rate": 2.3137095826809564e-08,
8803
+ "loss": 0.0091,
8804
+ "step": 1157
8805
+ },
8806
+ {
8807
+ "epoch": 0.9813559322033898,
8808
+ "grad_norm": 0.42089366912841797,
8809
+ "learning_rate": 2.1169606607662764e-08,
8810
+ "loss": 0.0024,
8811
+ "step": 1158
8812
+ },
8813
+ {
8814
+ "epoch": 0.9822033898305085,
8815
+ "grad_norm": 1.1868031024932861,
8816
+ "learning_rate": 1.92894406677524e-08,
8817
+ "loss": 0.0064,
8818
+ "step": 1159
8819
+ },
8820
+ {
8821
+ "epoch": 0.9830508474576272,
8822
+ "grad_norm": 1.4910340309143066,
8823
+ "learning_rate": 1.7496614460135174e-08,
8824
+ "loss": 0.0069,
8825
+ "step": 1160
8826
+ },
8827
+ {
8828
+ "epoch": 0.9830508474576272,
8829
+ "eval_accuracy": 1.0,
8830
+ "eval_f1": 1.0,
8831
+ "eval_loss": 0.0001118536019930616,
8832
+ "eval_precision": 1.0,
8833
+ "eval_recall": 1.0,
8834
+ "eval_runtime": 50.1342,
8835
+ "eval_samples_per_second": 5.824,
8836
+ "eval_steps_per_second": 0.199,
8837
+ "step": 1160
8838
+ },
8839
+ {
8840
+ "epoch": 0.9838983050847457,
8841
+ "grad_norm": 1.986571192741394,
8842
+ "learning_rate": 1.5791143673570263e-08,
8843
+ "loss": 0.0193,
8844
+ "step": 1161
8845
+ },
8846
+ {
8847
+ "epoch": 0.9847457627118644,
8848
+ "grad_norm": 2.655247688293457,
8849
+ "learning_rate": 1.4173043232380557e-08,
8850
+ "loss": 0.0257,
8851
+ "step": 1162
8852
+ },
8853
+ {
8854
+ "epoch": 0.985593220338983,
8855
+ "grad_norm": 1.0046510696411133,
8856
+ "learning_rate": 1.2642327296327194e-08,
8857
+ "loss": 0.003,
8858
+ "step": 1163
8859
+ },
8860
+ {
8861
+ "epoch": 0.9864406779661017,
8862
+ "grad_norm": 1.124706745147705,
8863
+ "learning_rate": 1.1199009260479676e-08,
8864
+ "loss": 0.0157,
8865
+ "step": 1164
8866
+ },
8867
+ {
8868
+ "epoch": 0.9872881355932204,
8869
+ "grad_norm": 1.0634783506393433,
8870
+ "learning_rate": 9.843101755101503e-09,
8871
+ "loss": 0.0046,
8872
+ "step": 1165
8873
+ },
8874
+ {
8875
+ "epoch": 0.988135593220339,
8876
+ "grad_norm": 1.6343358755111694,
8877
+ "learning_rate": 8.574616645541377e-09,
8878
+ "loss": 0.0113,
8879
+ "step": 1166
8880
+ },
8881
+ {
8882
+ "epoch": 0.9889830508474576,
8883
+ "grad_norm": 0.2569604814052582,
8884
+ "learning_rate": 7.3935650321255156e-09,
8885
+ "loss": 0.0015,
8886
+ "step": 1167
8887
+ },
8888
+ {
8889
+ "epoch": 0.9898305084745763,
8890
+ "grad_norm": 0.6046754717826843,
8891
+ "learning_rate": 6.299957250064381e-09,
8892
+ "loss": 0.0024,
8893
+ "step": 1168
8894
+ },
8895
+ {
8896
+ "epoch": 0.9906779661016949,
8897
+ "grad_norm": 0.760793149471283,
8898
+ "learning_rate": 5.293802869357212e-09,
8899
+ "loss": 0.0026,
8900
+ "step": 1169
8901
+ },
8902
+ {
8903
+ "epoch": 0.9915254237288136,
8904
+ "grad_norm": 1.5352802276611328,
8905
+ "learning_rate": 4.375110694713192e-09,
8906
+ "loss": 0.0089,
8907
+ "step": 1170
8908
+ },
8909
+ {
8910
+ "epoch": 0.9923728813559322,
8911
+ "grad_norm": 1.066978931427002,
8912
+ "learning_rate": 3.5438887654737355e-09,
8913
+ "loss": 0.0054,
8914
+ "step": 1171
8915
+ },
8916
+ {
8917
+ "epoch": 0.9932203389830508,
8918
+ "grad_norm": 0.2690616846084595,
8919
+ "learning_rate": 2.800144355540324e-09,
8920
+ "loss": 0.0012,
8921
+ "step": 1172
8922
+ },
8923
+ {
8924
+ "epoch": 0.9940677966101695,
8925
+ "grad_norm": 0.24727602303028107,
8926
+ "learning_rate": 2.1438839733101125e-09,
8927
+ "loss": 0.0012,
8928
+ "step": 1173
8929
+ },
8930
+ {
8931
+ "epoch": 0.9949152542372881,
8932
+ "grad_norm": 0.8187950849533081,
8933
+ "learning_rate": 1.5751133616215276e-09,
8934
+ "loss": 0.0028,
8935
+ "step": 1174
8936
+ },
8937
+ {
8938
+ "epoch": 0.9957627118644068,
8939
+ "grad_norm": 5.176650047302246,
8940
+ "learning_rate": 1.0938374977020882e-09,
8941
+ "loss": 0.0332,
8942
+ "step": 1175
8943
+ },
8944
+ {
8945
+ "epoch": 0.9966101694915255,
8946
+ "grad_norm": 1.1245023012161255,
8947
+ "learning_rate": 7.000605931273274e-10,
8948
+ "loss": 0.0031,
8949
+ "step": 1176
8950
+ },
8951
+ {
8952
+ "epoch": 0.997457627118644,
8953
+ "grad_norm": 0.7511110901832581,
8954
+ "learning_rate": 3.9378609377971335e-10,
8955
+ "loss": 0.0033,
8956
+ "step": 1177
8957
+ },
8958
+ {
8959
+ "epoch": 0.9983050847457627,
8960
+ "grad_norm": 0.6101163029670715,
8961
+ "learning_rate": 1.7501667982311454e-10,
8962
+ "loss": 0.0029,
8963
+ "step": 1178
8964
+ },
8965
+ {
8966
+ "epoch": 0.9991525423728813,
8967
+ "grad_norm": 1.6641690731048584,
8968
+ "learning_rate": 4.375426567726493e-11,
8969
+ "loss": 0.0074,
8970
+ "step": 1179
8971
+ },
8972
+ {
8973
+ "epoch": 1.0,
8974
+ "grad_norm": 0.8142516016960144,
8975
+ "learning_rate": 0.0,
8976
+ "loss": 0.0058,
8977
+ "step": 1180
8978
+ },
8979
+ {
8980
+ "epoch": 1.0,
8981
+ "eval_accuracy": 1.0,
8982
+ "eval_f1": 1.0,
8983
+ "eval_loss": 0.0001048951453412883,
8984
+ "eval_precision": 1.0,
8985
+ "eval_recall": 1.0,
8986
+ "eval_runtime": 49.7666,
8987
+ "eval_samples_per_second": 5.867,
8988
+ "eval_steps_per_second": 0.201,
8989
+ "step": 1180
8990
  }
8991
  ],
8992
  "logging_steps": 1,
 
9001
  "should_evaluate": false,
9002
  "should_log": false,
9003
  "should_save": true,
9004
+ "should_training_stop": true
9005
  },
9006
  "attributes": {}
9007
  }
9008
  },
9009
+ "total_flos": 3.6331250614114714e+17,
9010
  "train_batch_size": 8,
9011
  "trial_name": null,
9012
  "trial_params": null