ToastyPigeon commited on
Commit
750ee8a
·
verified ·
1 Parent(s): 9eac6cb

Training in progress, step 234, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step234/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step234/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step234/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step234/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step234/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step234/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step234/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step234/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step234/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step234/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step234/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step234/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step234/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step234/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step234/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step234/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042b65e1dac882ac0093711ff9d1ed6dec458676188c7d4000affb0485375140
3
  size 550593856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1990c2d68f0e2e44d28f9a5ee2378d9cee7f403a3934fca2a1cb4385e8f66fc6
3
  size 550593856
last-checkpoint/global_step234/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3978781575f21b035f8ba8f5cdf0926bb0c9639ee8f3b475d8beff9f5ff762e8
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:331c79fad78e2018dd5ab702370191c1ba9d76ceed7df33831618cc74f51f139
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a829b15f8ce7aaccd2aa9e7b413a1499fc9117e65bfbd4bb0e0c9b4e754dc6cf
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10a8a270568d77ff77bb3b06638111e8dd5762cf5a7c0da42dfa357b3f339c0c
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae710fc3816397c00617977eda0f3a0869a72696f4ff8ca9f199140b6659fdd4
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a54dfa0369915dc9f6c5e05306751bf6cb81a0a30b988dca22cc6afc97b88006
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e0d94fa68689faccd4069ec0129ff3aa6cea98e66d110d55969e8f100aae0e
3
+ size 243590464
last-checkpoint/global_step234/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af78a0696566f11c7acc705d8bdbfb9401d1b103365d1f956043384ba4abe78c
3
+ size 243590464
last-checkpoint/global_step234/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5286b6bf1b06e4faeea7783920020d7f9e730ae75c99d086ddef52daa56896
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb304e1edba2ada327ea07b04d195bedc51426cefec33d8fc89c986365cae1f7
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c931d02bb03ff9aaf2141b57d64494b21d048147ffc41c2849d75053183d16a6
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdfccbfd9f4bc00b40475ea66d1c49501eef8d5d9d7b379fad1a3a892588c3f0
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cedc996b57fd1e9b2c353c6fa9dc2a23aca3db52ec66f40e0ca7cda820a9955b
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fccd3cfabeb6bedcd9ba9e760f04df590e889b9c525639d449c227212f3d8661
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a2173d0821780144beac4cd92441e2d47bd4ff489e6bcb2522b3dcb5f2fd26
3
+ size 211435686
last-checkpoint/global_step234/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c6f252a007f965b4fe85d1dfef586386bb45f891dcb9d2af9f4785643c81e94
3
+ size 211435686
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step195
 
1
+ global_step234
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0d205b247ab17b5fb7452990cd269cf4ef10bda2683097ddce6af8c135abdc1
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:134a1f059f52a9bdf9df082d6896f7d8b5760d4d52176fa82dfbe01a23fb87c9
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61ca1bf98495958499a22cade70d63eaabfdcb012a2f66af4f5e7d4414626392
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a620eacd8f4cd828cfb1aebe552d5e8dae8821e0f453da23766cd3cb0fc809
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4071ea30f675614d9bcaeb751a6df81010828e93a461dc017bb8c2d1790fd8b3
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a162c9df7047160908baee7101046d35e92bb1a1356ac6f65730b0b0abb6d169
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:122476c594e95007c2b6aee18c853f5e96679ed35da34b2a71dda0a35dffb9b3
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f9712a619d6aa46296d8a883ec08e9d1c3e69b8f47bb56e4a362c61af346d5c
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f61b284b2b274f92f376a2e95925d0cbb78a5ed491b1f17dff855d4053e0449d
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c3be75b246d7517b2ce93c5ea356d84fa1126631a1aa584e4174a6a45fb01f8
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ee5173863e2cb2c91670d300a1232c7136aeb3cbb8e8c415e2b561ed21531c1
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cd295e7ee2a480367b2e688de7637a1d212d602ed9ac1e83ce463138ef7d19
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cae86fdfe88f89628e7a5f8ad48bf1ed1417d375ea504c7851d524b56445ed89
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:393dcff3659995d822f8e1744b724bc57b6986bec073c96de798eb021d6f55e4
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb21013bed59d364e3b186bc3199360efcdc8144f2dd2d2b123e56a645623fe2
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f042c259ed788084e3425e22e2364144fa3d72b73d1f475b32393e7b99cb156c
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4b13e35de6391f1bb48cdfbaa998c2879cd015ccd85e1b48a62d504eed8a347
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:366054962264279f0b5e5302d033c378e5387597c8a92a96d8dbf2d43d872448
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5078125,
5
  "eval_steps": 39,
6
- "global_step": 195,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1420,6 +1420,287 @@
1420
  "eval_samples_per_second": 1.225,
1421
  "eval_steps_per_second": 0.153,
1422
  "step": 195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1423
  }
1424
  ],
1425
  "logging_steps": 1,
@@ -1439,7 +1720,7 @@
1439
  "attributes": {}
1440
  }
1441
  },
1442
- "total_flos": 64462761492480.0,
1443
  "train_batch_size": 1,
1444
  "trial_name": null,
1445
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.609375,
5
  "eval_steps": 39,
6
+ "global_step": 234,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1420
  "eval_samples_per_second": 1.225,
1421
  "eval_steps_per_second": 0.153,
1422
  "step": 195
1423
+ },
1424
+ {
1425
+ "epoch": 0.5104166666666666,
1426
+ "grad_norm": 0.15606553454493674,
1427
+ "learning_rate": 5.7329260879789437e-05,
1428
+ "loss": 2.3709,
1429
+ "step": 196
1430
+ },
1431
+ {
1432
+ "epoch": 0.5130208333333334,
1433
+ "grad_norm": 0.1448236972696976,
1434
+ "learning_rate": 5.6941315870408066e-05,
1435
+ "loss": 2.4256,
1436
+ "step": 197
1437
+ },
1438
+ {
1439
+ "epoch": 0.515625,
1440
+ "grad_norm": 0.15924118657730604,
1441
+ "learning_rate": 5.655322625363687e-05,
1442
+ "loss": 2.5228,
1443
+ "step": 198
1444
+ },
1445
+ {
1446
+ "epoch": 0.5182291666666666,
1447
+ "grad_norm": 0.17454487852950232,
1448
+ "learning_rate": 5.616502093802565e-05,
1449
+ "loss": 2.2945,
1450
+ "step": 199
1451
+ },
1452
+ {
1453
+ "epoch": 0.5208333333333334,
1454
+ "grad_norm": 0.14248471717781994,
1455
+ "learning_rate": 5.577672884074249e-05,
1456
+ "loss": 2.3514,
1457
+ "step": 200
1458
+ },
1459
+ {
1460
+ "epoch": 0.5234375,
1461
+ "grad_norm": 0.14873123082787126,
1462
+ "learning_rate": 5.538837888541986e-05,
1463
+ "loss": 2.4918,
1464
+ "step": 201
1465
+ },
1466
+ {
1467
+ "epoch": 0.5260416666666666,
1468
+ "grad_norm": 0.1600074911775423,
1469
+ "learning_rate": 5.500000000000001e-05,
1470
+ "loss": 2.3231,
1471
+ "step": 202
1472
+ },
1473
+ {
1474
+ "epoch": 0.5286458333333334,
1475
+ "grad_norm": 0.15123679319262007,
1476
+ "learning_rate": 5.461162111458016e-05,
1477
+ "loss": 2.3613,
1478
+ "step": 203
1479
+ },
1480
+ {
1481
+ "epoch": 0.53125,
1482
+ "grad_norm": 0.13067031369476279,
1483
+ "learning_rate": 5.422327115925753e-05,
1484
+ "loss": 2.3407,
1485
+ "step": 204
1486
+ },
1487
+ {
1488
+ "epoch": 0.5338541666666666,
1489
+ "grad_norm": 0.1419685152456832,
1490
+ "learning_rate": 5.3834979061974376e-05,
1491
+ "loss": 2.4375,
1492
+ "step": 205
1493
+ },
1494
+ {
1495
+ "epoch": 0.5364583333333334,
1496
+ "grad_norm": 0.13456464948427768,
1497
+ "learning_rate": 5.3446773746363153e-05,
1498
+ "loss": 2.3542,
1499
+ "step": 206
1500
+ },
1501
+ {
1502
+ "epoch": 0.5390625,
1503
+ "grad_norm": 0.1394757337185396,
1504
+ "learning_rate": 5.305868412959195e-05,
1505
+ "loss": 2.6189,
1506
+ "step": 207
1507
+ },
1508
+ {
1509
+ "epoch": 0.5416666666666666,
1510
+ "grad_norm": 0.14661264347416875,
1511
+ "learning_rate": 5.2670739120210574e-05,
1512
+ "loss": 2.4666,
1513
+ "step": 208
1514
+ },
1515
+ {
1516
+ "epoch": 0.5442708333333334,
1517
+ "grad_norm": 0.15772002651226175,
1518
+ "learning_rate": 5.2282967615997125e-05,
1519
+ "loss": 2.2665,
1520
+ "step": 209
1521
+ },
1522
+ {
1523
+ "epoch": 0.546875,
1524
+ "grad_norm": 0.1436207432226202,
1525
+ "learning_rate": 5.1895398501805383e-05,
1526
+ "loss": 2.4412,
1527
+ "step": 210
1528
+ },
1529
+ {
1530
+ "epoch": 0.5494791666666666,
1531
+ "grad_norm": 0.1627045485383769,
1532
+ "learning_rate": 5.150806064741323e-05,
1533
+ "loss": 2.4241,
1534
+ "step": 211
1535
+ },
1536
+ {
1537
+ "epoch": 0.5520833333333334,
1538
+ "grad_norm": 0.1608053633868765,
1539
+ "learning_rate": 5.112098290537213e-05,
1540
+ "loss": 2.2647,
1541
+ "step": 212
1542
+ },
1543
+ {
1544
+ "epoch": 0.5546875,
1545
+ "grad_norm": 0.15608204954248603,
1546
+ "learning_rate": 5.0734194108858e-05,
1547
+ "loss": 2.4768,
1548
+ "step": 213
1549
+ },
1550
+ {
1551
+ "epoch": 0.5572916666666666,
1552
+ "grad_norm": 0.1576523104437354,
1553
+ "learning_rate": 5.0347723069523355e-05,
1554
+ "loss": 2.2488,
1555
+ "step": 214
1556
+ },
1557
+ {
1558
+ "epoch": 0.5598958333333334,
1559
+ "grad_norm": 0.16267311756196431,
1560
+ "learning_rate": 4.9961598575351155e-05,
1561
+ "loss": 2.2745,
1562
+ "step": 215
1563
+ },
1564
+ {
1565
+ "epoch": 0.5625,
1566
+ "grad_norm": 0.14597656332784,
1567
+ "learning_rate": 4.9575849388510473e-05,
1568
+ "loss": 2.2226,
1569
+ "step": 216
1570
+ },
1571
+ {
1572
+ "epoch": 0.5651041666666666,
1573
+ "grad_norm": 0.12952717920836662,
1574
+ "learning_rate": 4.919050424321395e-05,
1575
+ "loss": 2.2863,
1576
+ "step": 217
1577
+ },
1578
+ {
1579
+ "epoch": 0.5677083333333334,
1580
+ "grad_norm": 0.14126095486957518,
1581
+ "learning_rate": 4.880559184357745e-05,
1582
+ "loss": 2.2773,
1583
+ "step": 218
1584
+ },
1585
+ {
1586
+ "epoch": 0.5703125,
1587
+ "grad_norm": 0.15894975244045567,
1588
+ "learning_rate": 4.842114086148185e-05,
1589
+ "loss": 2.4144,
1590
+ "step": 219
1591
+ },
1592
+ {
1593
+ "epoch": 0.5729166666666666,
1594
+ "grad_norm": 0.15187930552412898,
1595
+ "learning_rate": 4.803717993443734e-05,
1596
+ "loss": 2.2729,
1597
+ "step": 220
1598
+ },
1599
+ {
1600
+ "epoch": 0.5755208333333334,
1601
+ "grad_norm": 0.15567668429445186,
1602
+ "learning_rate": 4.765373766345028e-05,
1603
+ "loss": 2.4327,
1604
+ "step": 221
1605
+ },
1606
+ {
1607
+ "epoch": 0.578125,
1608
+ "grad_norm": 0.1577838508745425,
1609
+ "learning_rate": 4.727084261089257e-05,
1610
+ "loss": 2.5534,
1611
+ "step": 222
1612
+ },
1613
+ {
1614
+ "epoch": 0.5807291666666666,
1615
+ "grad_norm": 0.1447851641278385,
1616
+ "learning_rate": 4.688852329837424e-05,
1617
+ "loss": 2.2928,
1618
+ "step": 223
1619
+ },
1620
+ {
1621
+ "epoch": 0.5833333333333334,
1622
+ "grad_norm": 0.13963925711509403,
1623
+ "learning_rate": 4.6506808204618754e-05,
1624
+ "loss": 2.3612,
1625
+ "step": 224
1626
+ },
1627
+ {
1628
+ "epoch": 0.5859375,
1629
+ "grad_norm": 0.17944687510007068,
1630
+ "learning_rate": 4.612572576334171e-05,
1631
+ "loss": 2.4195,
1632
+ "step": 225
1633
+ },
1634
+ {
1635
+ "epoch": 0.5885416666666666,
1636
+ "grad_norm": 0.15388075071309162,
1637
+ "learning_rate": 4.574530436113286e-05,
1638
+ "loss": 2.5277,
1639
+ "step": 226
1640
+ },
1641
+ {
1642
+ "epoch": 0.5911458333333334,
1643
+ "grad_norm": 0.12718481374460847,
1644
+ "learning_rate": 4.536557233534153e-05,
1645
+ "loss": 2.2991,
1646
+ "step": 227
1647
+ },
1648
+ {
1649
+ "epoch": 0.59375,
1650
+ "grad_norm": 0.14948263293785125,
1651
+ "learning_rate": 4.498655797196586e-05,
1652
+ "loss": 2.4404,
1653
+ "step": 228
1654
+ },
1655
+ {
1656
+ "epoch": 0.5963541666666666,
1657
+ "grad_norm": 0.13608773847019048,
1658
+ "learning_rate": 4.460828950354577e-05,
1659
+ "loss": 2.2945,
1660
+ "step": 229
1661
+ },
1662
+ {
1663
+ "epoch": 0.5989583333333334,
1664
+ "grad_norm": 0.14126010771163286,
1665
+ "learning_rate": 4.423079510705992e-05,
1666
+ "loss": 2.2835,
1667
+ "step": 230
1668
+ },
1669
+ {
1670
+ "epoch": 0.6015625,
1671
+ "grad_norm": 0.15458611014177978,
1672
+ "learning_rate": 4.3854102901826834e-05,
1673
+ "loss": 2.4491,
1674
+ "step": 231
1675
+ },
1676
+ {
1677
+ "epoch": 0.6041666666666666,
1678
+ "grad_norm": 0.17078246755698387,
1679
+ "learning_rate": 4.3478240947410386e-05,
1680
+ "loss": 2.5331,
1681
+ "step": 232
1682
+ },
1683
+ {
1684
+ "epoch": 0.6067708333333334,
1685
+ "grad_norm": 0.14807435693469648,
1686
+ "learning_rate": 4.3103237241529506e-05,
1687
+ "loss": 2.4154,
1688
+ "step": 233
1689
+ },
1690
+ {
1691
+ "epoch": 0.609375,
1692
+ "grad_norm": 0.15605308853260608,
1693
+ "learning_rate": 4.272911971797279e-05,
1694
+ "loss": 2.3354,
1695
+ "step": 234
1696
+ },
1697
+ {
1698
+ "epoch": 0.609375,
1699
+ "eval_loss": 2.401761531829834,
1700
+ "eval_runtime": 65.3141,
1701
+ "eval_samples_per_second": 1.225,
1702
+ "eval_steps_per_second": 0.153,
1703
+ "step": 234
1704
  }
1705
  ],
1706
  "logging_steps": 1,
 
1720
  "attributes": {}
1721
  }
1722
  },
1723
+ "total_flos": 77355313790976.0,
1724
  "train_batch_size": 1,
1725
  "trial_name": null,
1726
  "trial_params": null