mtzig commited on
Commit
b8bed13
·
verified ·
1 Parent(s): c41fac2

Training in progress, step 776, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8a9b5df6415cf6aaf2fbd963784ca2798906e60cec2a4d96cedf776b0170bd5
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9812233963d91f939967280f1fb8dadd8814b762a867032b3f1837b1d53052
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a477ffe7b31b769212a07b084cb1d6fd6244ebaf19a4d7d0e204d6e487b1aa1c
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c88e2b1f68ece33ec06a916e5bb00b720c1f8a904961de7eb56d64a799fab9f
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67bf1d290ebbe8581c52d47ca6e510d907a2305a5eebfef589fed6462f44754f
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d35b958384213145c1212e288e3c8343f840167644866a5b9157967625e8fb
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:863d7a925082ef890f7e9f5fe37f69a475fb3c8a749f7db0ba29e80f4b2757d1
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1899dca709a2035b39b51d9a6ee7e6219b311be55baca81d0f0aea25b360adcf
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44f0fee06777bc54d20e8cfadf02e1fcf062878ec77c27273222f443590d3ca7
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee5f9be012755d9da3db6b768bd75a664482268608ec40525ff012db7c716fa
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6b4dadb656bd2f818f65eedb04bde289d144e274b64e185d6aa6038f82edc5b
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4101704947e1bc12f0b2d183919dbd5c6423744d74a9e9bced6f9f568c741cdd
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6714a698672b2edbe18148b125be564081dbe0c32820c4d2862c9ebbfc701d39
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69971347771fed00d279e6b4731b6a18fff227ba1765979d94a1881fd74613ed
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9f0d3ac60ec2c909466305ff9424e3153bb7f95f5b685220572eb0bd2fe45e0
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23d8a7e8405e5f6c5c83bae18e57f014dc7bbbf960c7bbe1c96315c1f7b18576
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6a53af4113d719a74be72b9b96e06ddfcf589f1c186482a58b06d91bc3e4d34
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecbf30739d47702ff1dc62fda61a8b804166ddf0362f853a15764a0f6ca810ca
3
  size 15088
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3f79573df50dcf97b1bf70e5483cf9597da1b8a05214eb274fc3c3833cf3ad5
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ea2caeae1f19550ff4c866c756827368743f8de236623a1b0db1ace7686574
3
  size 15088
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f80c1cfb5ea86276cd48dd4a859d36d824a2fd5563d437a8a0c4be3eb6200b32
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1c459f1b116355fb0f3bdc3f65b9adc53657df08c616f375c6b5ee9a3f2a5eb
3
  size 15088
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:714110b482b507e1fb6219bf62e8ec1001b632b03d77db23a105560f7b48af04
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e129f010772342f4e564c483d7381dc6b23561fca445914ea8c159bc5d7a2ed
3
  size 15088
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b38c3c47d3e1cfb698a7e53778b8d216b765546387a9e6bd3f149bea8249062
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836d724299dd150551400174e98221fb5b90ef0e07f83661da8beb5333f31043
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9020618556701031,
5
  "eval_steps": 20,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5339,6 +5339,574 @@
5339
  "eval_samples_per_second": 5.436,
5340
  "eval_steps_per_second": 0.179,
5341
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5342
  }
5343
  ],
5344
  "logging_steps": 1,
@@ -5353,12 +5921,12 @@
5353
  "should_evaluate": false,
5354
  "should_log": false,
5355
  "should_save": true,
5356
- "should_training_stop": false
5357
  },
5358
  "attributes": {}
5359
  }
5360
  },
5361
- "total_flos": 2.3348483995572634e+17,
5362
  "train_batch_size": 8,
5363
  "trial_name": null,
5364
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 20,
6
+ "global_step": 776,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5339
  "eval_samples_per_second": 5.436,
5340
  "eval_steps_per_second": 0.179,
5341
  "step": 700
5342
+ },
5343
+ {
5344
+ "epoch": 0.9033505154639175,
5345
+ "grad_norm": 4.556031703948975,
5346
+ "learning_rate": 5.643565211494285e-07,
5347
+ "loss": 0.041,
5348
+ "step": 701
5349
+ },
5350
+ {
5351
+ "epoch": 0.904639175257732,
5352
+ "grad_norm": 0.027816738933324814,
5353
+ "learning_rate": 5.495458148283505e-07,
5354
+ "loss": 0.0002,
5355
+ "step": 702
5356
+ },
5357
+ {
5358
+ "epoch": 0.9059278350515464,
5359
+ "grad_norm": 0.04451802000403404,
5360
+ "learning_rate": 5.349265521053603e-07,
5361
+ "loss": 0.0002,
5362
+ "step": 703
5363
+ },
5364
+ {
5365
+ "epoch": 0.9072164948453608,
5366
+ "grad_norm": 1.494473934173584,
5367
+ "learning_rate": 5.204990291317535e-07,
5368
+ "loss": 0.003,
5369
+ "step": 704
5370
+ },
5371
+ {
5372
+ "epoch": 0.9085051546391752,
5373
+ "grad_norm": 2.967031955718994,
5374
+ "learning_rate": 5.062635381746362e-07,
5375
+ "loss": 0.0254,
5376
+ "step": 705
5377
+ },
5378
+ {
5379
+ "epoch": 0.9097938144329897,
5380
+ "grad_norm": 0.6380704045295715,
5381
+ "learning_rate": 4.92220367611006e-07,
5382
+ "loss": 0.0036,
5383
+ "step": 706
5384
+ },
5385
+ {
5386
+ "epoch": 0.9110824742268041,
5387
+ "grad_norm": 0.40568777918815613,
5388
+ "learning_rate": 4.783698019219118e-07,
5389
+ "loss": 0.0014,
5390
+ "step": 707
5391
+ },
5392
+ {
5393
+ "epoch": 0.9123711340206185,
5394
+ "grad_norm": 3.13850736618042,
5395
+ "learning_rate": 4.647121216866857e-07,
5396
+ "loss": 0.0343,
5397
+ "step": 708
5398
+ },
5399
+ {
5400
+ "epoch": 0.913659793814433,
5401
+ "grad_norm": 1.518782377243042,
5402
+ "learning_rate": 4.512476035772628e-07,
5403
+ "loss": 0.0098,
5404
+ "step": 709
5405
+ },
5406
+ {
5407
+ "epoch": 0.9149484536082474,
5408
+ "grad_norm": 0.19677811861038208,
5409
+ "learning_rate": 4.3797652035257544e-07,
5410
+ "loss": 0.0016,
5411
+ "step": 710
5412
+ },
5413
+ {
5414
+ "epoch": 0.9162371134020618,
5415
+ "grad_norm": 1.9592320919036865,
5416
+ "learning_rate": 4.248991408530279e-07,
5417
+ "loss": 0.0126,
5418
+ "step": 711
5419
+ },
5420
+ {
5421
+ "epoch": 0.9175257731958762,
5422
+ "grad_norm": 0.1629909873008728,
5423
+ "learning_rate": 4.1201572999505e-07,
5424
+ "loss": 0.0004,
5425
+ "step": 712
5426
+ },
5427
+ {
5428
+ "epoch": 0.9188144329896907,
5429
+ "grad_norm": 5.022075653076172,
5430
+ "learning_rate": 3.9932654876573164e-07,
5431
+ "loss": 0.0158,
5432
+ "step": 713
5433
+ },
5434
+ {
5435
+ "epoch": 0.9201030927835051,
5436
+ "grad_norm": 0.3693143427371979,
5437
+ "learning_rate": 3.8683185421753313e-07,
5438
+ "loss": 0.0014,
5439
+ "step": 714
5440
+ },
5441
+ {
5442
+ "epoch": 0.9213917525773195,
5443
+ "grad_norm": 1.231784462928772,
5444
+ "learning_rate": 3.74531899463082e-07,
5445
+ "loss": 0.0082,
5446
+ "step": 715
5447
+ },
5448
+ {
5449
+ "epoch": 0.9226804123711341,
5450
+ "grad_norm": 0.04480728134512901,
5451
+ "learning_rate": 3.6242693367004365e-07,
5452
+ "loss": 0.0002,
5453
+ "step": 716
5454
+ },
5455
+ {
5456
+ "epoch": 0.9239690721649485,
5457
+ "grad_norm": 0.14905039966106415,
5458
+ "learning_rate": 3.5051720205606877e-07,
5459
+ "loss": 0.0009,
5460
+ "step": 717
5461
+ },
5462
+ {
5463
+ "epoch": 0.9252577319587629,
5464
+ "grad_norm": 0.11093373596668243,
5465
+ "learning_rate": 3.38802945883836e-07,
5466
+ "loss": 0.0008,
5467
+ "step": 718
5468
+ },
5469
+ {
5470
+ "epoch": 0.9265463917525774,
5471
+ "grad_norm": 1.7350820302963257,
5472
+ "learning_rate": 3.2728440245615724e-07,
5473
+ "loss": 0.0041,
5474
+ "step": 719
5475
+ },
5476
+ {
5477
+ "epoch": 0.9278350515463918,
5478
+ "grad_norm": 4.958645343780518,
5479
+ "learning_rate": 3.1596180511117235e-07,
5480
+ "loss": 0.0122,
5481
+ "step": 720
5482
+ },
5483
+ {
5484
+ "epoch": 0.9278350515463918,
5485
+ "eval_accuracy": 0.9980139026812314,
5486
+ "eval_f1": 0.9642857142857143,
5487
+ "eval_loss": 0.01316594984382391,
5488
+ "eval_precision": 0.9818181818181818,
5489
+ "eval_recall": 0.9473684210526315,
5490
+ "eval_runtime": 85.875,
5491
+ "eval_samples_per_second": 5.298,
5492
+ "eval_steps_per_second": 0.175,
5493
+ "step": 720
5494
+ },
5495
+ {
5496
+ "epoch": 0.9291237113402062,
5497
+ "grad_norm": 3.578312873840332,
5498
+ "learning_rate": 3.048353832176221e-07,
5499
+ "loss": 0.0234,
5500
+ "step": 721
5501
+ },
5502
+ {
5503
+ "epoch": 0.9304123711340206,
5504
+ "grad_norm": 0.2854086756706238,
5505
+ "learning_rate": 2.939053621702015e-07,
5506
+ "loss": 0.0011,
5507
+ "step": 722
5508
+ },
5509
+ {
5510
+ "epoch": 0.9317010309278351,
5511
+ "grad_norm": 2.446420907974243,
5512
+ "learning_rate": 2.83171963384995e-07,
5513
+ "loss": 0.0121,
5514
+ "step": 723
5515
+ },
5516
+ {
5517
+ "epoch": 0.9329896907216495,
5518
+ "grad_norm": 3.754582643508911,
5519
+ "learning_rate": 2.7263540429498747e-07,
5520
+ "loss": 0.0285,
5521
+ "step": 724
5522
+ },
5523
+ {
5524
+ "epoch": 0.9342783505154639,
5525
+ "grad_norm": 1.2466968297958374,
5526
+ "learning_rate": 2.6229589834566807e-07,
5527
+ "loss": 0.0334,
5528
+ "step": 725
5529
+ },
5530
+ {
5531
+ "epoch": 0.9355670103092784,
5532
+ "grad_norm": 4.552764415740967,
5533
+ "learning_rate": 2.5215365499069446e-07,
5534
+ "loss": 0.013,
5535
+ "step": 726
5536
+ },
5537
+ {
5538
+ "epoch": 0.9368556701030928,
5539
+ "grad_norm": 0.8621684312820435,
5540
+ "learning_rate": 2.4220887968765873e-07,
5541
+ "loss": 0.0022,
5542
+ "step": 727
5543
+ },
5544
+ {
5545
+ "epoch": 0.9381443298969072,
5546
+ "grad_norm": 3.7333528995513916,
5547
+ "learning_rate": 2.3246177389392388e-07,
5548
+ "loss": 0.0402,
5549
+ "step": 728
5550
+ },
5551
+ {
5552
+ "epoch": 0.9394329896907216,
5553
+ "grad_norm": 0.3883923590183258,
5554
+ "learning_rate": 2.229125350625394e-07,
5555
+ "loss": 0.0012,
5556
+ "step": 729
5557
+ },
5558
+ {
5559
+ "epoch": 0.9407216494845361,
5560
+ "grad_norm": 0.36370164155960083,
5561
+ "learning_rate": 2.1356135663824328e-07,
5562
+ "loss": 0.0017,
5563
+ "step": 730
5564
+ },
5565
+ {
5566
+ "epoch": 0.9420103092783505,
5567
+ "grad_norm": 1.1851253509521484,
5568
+ "learning_rate": 2.0440842805354522e-07,
5569
+ "loss": 0.0178,
5570
+ "step": 731
5571
+ },
5572
+ {
5573
+ "epoch": 0.9432989690721649,
5574
+ "grad_norm": 0.40920475125312805,
5575
+ "learning_rate": 1.9545393472488738e-07,
5576
+ "loss": 0.0024,
5577
+ "step": 732
5578
+ },
5579
+ {
5580
+ "epoch": 0.9445876288659794,
5581
+ "grad_norm": 1.3488298654556274,
5582
+ "learning_rate": 1.866980580488842e-07,
5583
+ "loss": 0.0269,
5584
+ "step": 733
5585
+ },
5586
+ {
5587
+ "epoch": 0.9458762886597938,
5588
+ "grad_norm": 1.7972975969314575,
5589
+ "learning_rate": 1.7814097539865626e-07,
5590
+ "loss": 0.0037,
5591
+ "step": 734
5592
+ },
5593
+ {
5594
+ "epoch": 0.9471649484536082,
5595
+ "grad_norm": 0.23618119955062866,
5596
+ "learning_rate": 1.6978286012023225e-07,
5597
+ "loss": 0.0013,
5598
+ "step": 735
5599
+ },
5600
+ {
5601
+ "epoch": 0.9484536082474226,
5602
+ "grad_norm": 0.15299421548843384,
5603
+ "learning_rate": 1.6162388152903498e-07,
5604
+ "loss": 0.0007,
5605
+ "step": 736
5606
+ },
5607
+ {
5608
+ "epoch": 0.9497422680412371,
5609
+ "grad_norm": 0.8923954963684082,
5610
+ "learning_rate": 1.5366420490645738e-07,
5611
+ "loss": 0.0033,
5612
+ "step": 737
5613
+ },
5614
+ {
5615
+ "epoch": 0.9510309278350515,
5616
+ "grad_norm": 0.06458217650651932,
5617
+ "learning_rate": 1.4590399149650769e-07,
5618
+ "loss": 0.0003,
5619
+ "step": 738
5620
+ },
5621
+ {
5622
+ "epoch": 0.9523195876288659,
5623
+ "grad_norm": 2.2554473876953125,
5624
+ "learning_rate": 1.3834339850254952e-07,
5625
+ "loss": 0.0097,
5626
+ "step": 739
5627
+ },
5628
+ {
5629
+ "epoch": 0.9536082474226805,
5630
+ "grad_norm": 1.226751685142517,
5631
+ "learning_rate": 1.309825790841146e-07,
5632
+ "loss": 0.0276,
5633
+ "step": 740
5634
+ },
5635
+ {
5636
+ "epoch": 0.9536082474226805,
5637
+ "eval_accuracy": 0.9980139026812314,
5638
+ "eval_f1": 0.9642857142857143,
5639
+ "eval_loss": 0.013158504851162434,
5640
+ "eval_precision": 0.9818181818181818,
5641
+ "eval_recall": 0.9473684210526315,
5642
+ "eval_runtime": 85.185,
5643
+ "eval_samples_per_second": 5.341,
5644
+ "eval_steps_per_second": 0.176,
5645
+ "step": 740
5646
+ },
5647
+ {
5648
+ "epoch": 0.9548969072164949,
5649
+ "grad_norm": 0.9453576803207397,
5650
+ "learning_rate": 1.2382168235379742e-07,
5651
+ "loss": 0.001,
5652
+ "step": 741
5653
+ },
5654
+ {
5655
+ "epoch": 0.9561855670103093,
5656
+ "grad_norm": 6.605914115905762,
5657
+ "learning_rate": 1.1686085337423991e-07,
5658
+ "loss": 0.0179,
5659
+ "step": 742
5660
+ },
5661
+ {
5662
+ "epoch": 0.9574742268041238,
5663
+ "grad_norm": 3.2699971199035645,
5664
+ "learning_rate": 1.1010023315518592e-07,
5665
+ "loss": 0.0252,
5666
+ "step": 743
5667
+ },
5668
+ {
5669
+ "epoch": 0.9587628865979382,
5670
+ "grad_norm": 2.245004415512085,
5671
+ "learning_rate": 1.0353995865063138e-07,
5672
+ "loss": 0.0321,
5673
+ "step": 744
5674
+ },
5675
+ {
5676
+ "epoch": 0.9600515463917526,
5677
+ "grad_norm": 0.08078460395336151,
5678
+ "learning_rate": 9.718016275604759e-08,
5679
+ "loss": 0.0005,
5680
+ "step": 745
5681
+ },
5682
+ {
5683
+ "epoch": 0.961340206185567,
5684
+ "grad_norm": 0.14461307227611542,
5685
+ "learning_rate": 9.10209743056889e-08,
5686
+ "loss": 0.0006,
5687
+ "step": 746
5688
+ },
5689
+ {
5690
+ "epoch": 0.9626288659793815,
5691
+ "grad_norm": 2.124976396560669,
5692
+ "learning_rate": 8.506251806997934e-08,
5693
+ "loss": 0.0429,
5694
+ "step": 747
5695
+ },
5696
+ {
5697
+ "epoch": 0.9639175257731959,
5698
+ "grad_norm": 0.512088418006897,
5699
+ "learning_rate": 7.930491475299229e-08,
5700
+ "loss": 0.0009,
5701
+ "step": 748
5702
+ },
5703
+ {
5704
+ "epoch": 0.9652061855670103,
5705
+ "grad_norm": 0.8166017532348633,
5706
+ "learning_rate": 7.37482809900003e-08,
5707
+ "loss": 0.002,
5708
+ "step": 749
5709
+ },
5710
+ {
5711
+ "epoch": 0.9664948453608248,
5712
+ "grad_norm": 0.9644703269004822,
5713
+ "learning_rate": 6.839272934511143e-08,
5714
+ "loss": 0.0045,
5715
+ "step": 750
5716
+ },
5717
+ {
5718
+ "epoch": 0.9677835051546392,
5719
+ "grad_norm": 0.47802242636680603,
5720
+ "learning_rate": 6.323836830899321e-08,
5721
+ "loss": 0.0015,
5722
+ "step": 751
5723
+ },
5724
+ {
5725
+ "epoch": 0.9690721649484536,
5726
+ "grad_norm": 4.951274871826172,
5727
+ "learning_rate": 5.828530229667228e-08,
5728
+ "loss": 0.0172,
5729
+ "step": 752
5730
+ },
5731
+ {
5732
+ "epoch": 0.970360824742268,
5733
+ "grad_norm": 0.19146917760372162,
5734
+ "learning_rate": 5.353363164541825e-08,
5735
+ "loss": 0.0011,
5736
+ "step": 753
5737
+ },
5738
+ {
5739
+ "epoch": 0.9716494845360825,
5740
+ "grad_norm": 1.4356184005737305,
5741
+ "learning_rate": 4.898345261271531e-08,
5742
+ "loss": 0.002,
5743
+ "step": 754
5744
+ },
5745
+ {
5746
+ "epoch": 0.9729381443298969,
5747
+ "grad_norm": 2.761564254760742,
5748
+ "learning_rate": 4.463485737430606e-08,
5749
+ "loss": 0.0068,
5750
+ "step": 755
5751
+ },
5752
+ {
5753
+ "epoch": 0.9742268041237113,
5754
+ "grad_norm": 3.234301805496216,
5755
+ "learning_rate": 4.0487934022328533e-08,
5756
+ "loss": 0.0142,
5757
+ "step": 756
5758
+ },
5759
+ {
5760
+ "epoch": 0.9755154639175257,
5761
+ "grad_norm": 2.400604248046875,
5762
+ "learning_rate": 3.654276656353206e-08,
5763
+ "loss": 0.0098,
5764
+ "step": 757
5765
+ },
5766
+ {
5767
+ "epoch": 0.9768041237113402,
5768
+ "grad_norm": 0.21227847039699554,
5769
+ "learning_rate": 3.27994349175742e-08,
5770
+ "loss": 0.0006,
5771
+ "step": 758
5772
+ },
5773
+ {
5774
+ "epoch": 0.9780927835051546,
5775
+ "grad_norm": 1.0701520442962646,
5776
+ "learning_rate": 2.9258014915399813e-08,
5777
+ "loss": 0.0013,
5778
+ "step": 759
5779
+ },
5780
+ {
5781
+ "epoch": 0.979381443298969,
5782
+ "grad_norm": 1.216143250465393,
5783
+ "learning_rate": 2.591857829770672e-08,
5784
+ "loss": 0.0042,
5785
+ "step": 760
5786
+ },
5787
+ {
5788
+ "epoch": 0.979381443298969,
5789
+ "eval_accuracy": 0.9980139026812314,
5790
+ "eval_f1": 0.9642857142857143,
5791
+ "eval_loss": 0.013023993000388145,
5792
+ "eval_precision": 0.9818181818181818,
5793
+ "eval_recall": 0.9473684210526315,
5794
+ "eval_runtime": 86.4332,
5795
+ "eval_samples_per_second": 5.264,
5796
+ "eval_steps_per_second": 0.174,
5797
+ "step": 760
5798
+ },
5799
+ {
5800
+ "epoch": 0.9806701030927835,
5801
+ "grad_norm": 3.9802327156066895,
5802
+ "learning_rate": 2.278119271349466e-08,
5803
+ "loss": 0.0287,
5804
+ "step": 761
5805
+ },
5806
+ {
5807
+ "epoch": 0.9819587628865979,
5808
+ "grad_norm": 1.6140069961547852,
5809
+ "learning_rate": 1.984592171869082e-08,
5810
+ "loss": 0.0041,
5811
+ "step": 762
5812
+ },
5813
+ {
5814
+ "epoch": 0.9832474226804123,
5815
+ "grad_norm": 0.06459134072065353,
5816
+ "learning_rate": 1.711282477486642e-08,
5817
+ "loss": 0.0003,
5818
+ "step": 763
5819
+ },
5820
+ {
5821
+ "epoch": 0.9845360824742269,
5822
+ "grad_norm": 0.7752759456634521,
5823
+ "learning_rate": 1.4581957248026579e-08,
5824
+ "loss": 0.0037,
5825
+ "step": 764
5826
+ },
5827
+ {
5828
+ "epoch": 0.9858247422680413,
5829
+ "grad_norm": 1.614241361618042,
5830
+ "learning_rate": 1.2253370407495636e-08,
5831
+ "loss": 0.0067,
5832
+ "step": 765
5833
+ },
5834
+ {
5835
+ "epoch": 0.9871134020618557,
5836
+ "grad_norm": 0.4276859760284424,
5837
+ "learning_rate": 1.0127111424872437e-08,
5838
+ "loss": 0.0019,
5839
+ "step": 766
5840
+ },
5841
+ {
5842
+ "epoch": 0.9884020618556701,
5843
+ "grad_norm": 3.409043788909912,
5844
+ "learning_rate": 8.203223373078883e-09,
5845
+ "loss": 0.016,
5846
+ "step": 767
5847
+ },
5848
+ {
5849
+ "epoch": 0.9896907216494846,
5850
+ "grad_norm": 0.6996654868125916,
5851
+ "learning_rate": 6.481745225485059e-09,
5852
+ "loss": 0.0018,
5853
+ "step": 768
5854
+ },
5855
+ {
5856
+ "epoch": 0.990979381443299,
5857
+ "grad_norm": 0.7112312912940979,
5858
+ "learning_rate": 4.962711855120983e-09,
5859
+ "loss": 0.0026,
5860
+ "step": 769
5861
+ },
5862
+ {
5863
+ "epoch": 0.9922680412371134,
5864
+ "grad_norm": 4.281740665435791,
5865
+ "learning_rate": 3.6461540339682855e-09,
5866
+ "loss": 0.0217,
5867
+ "step": 770
5868
+ },
5869
+ {
5870
+ "epoch": 0.9935567010309279,
5871
+ "grad_norm": 0.22678984701633453,
5872
+ "learning_rate": 2.532098432341812e-09,
5873
+ "loss": 0.0009,
5874
+ "step": 771
5875
+ },
5876
+ {
5877
+ "epoch": 0.9948453608247423,
5878
+ "grad_norm": 2.139557123184204,
5879
+ "learning_rate": 1.6205676183411733e-09,
5880
+ "loss": 0.0065,
5881
+ "step": 772
5882
+ },
5883
+ {
5884
+ "epoch": 0.9961340206185567,
5885
+ "grad_norm": 1.8708549737930298,
5886
+ "learning_rate": 9.115800574022171e-10,
5887
+ "loss": 0.0096,
5888
+ "step": 773
5889
+ },
5890
+ {
5891
+ "epoch": 0.9974226804123711,
5892
+ "grad_norm": 1.9189296960830688,
5893
+ "learning_rate": 4.0515011191621933e-10,
5894
+ "loss": 0.0148,
5895
+ "step": 774
5896
+ },
5897
+ {
5898
+ "epoch": 0.9987113402061856,
5899
+ "grad_norm": 1.2406359910964966,
5900
+ "learning_rate": 1.0128804094233779e-10,
5901
+ "loss": 0.0034,
5902
+ "step": 775
5903
+ },
5904
+ {
5905
+ "epoch": 1.0,
5906
+ "grad_norm": 2.146103858947754,
5907
+ "learning_rate": 0.0,
5908
+ "loss": 0.0056,
5909
+ "step": 776
5910
  }
5911
  ],
5912
  "logging_steps": 1,
 
5921
  "should_evaluate": false,
5922
  "should_log": false,
5923
  "should_save": true,
5924
+ "should_training_stop": true
5925
  },
5926
  "attributes": {}
5927
  }
5928
  },
5929
+ "total_flos": 2.5857289592242176e+17,
5930
  "train_batch_size": 8,
5931
  "trial_name": null,
5932
  "trial_params": null