ToastyPigeon commited on
Commit
9ee62ee
·
verified ·
1 Parent(s): 0431096

Training in progress, step 273, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step273/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step273/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step273/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step273/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step273/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step273/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step273/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step273/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step273/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step273/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step273/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step273/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step273/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step273/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step273/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step273/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1990c2d68f0e2e44d28f9a5ee2378d9cee7f403a3934fca2a1cb4385e8f66fc6
3
  size 550593856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a53baa7fb5e46eac3e86e83348e470e62e4cdc1131a11ff07d700e96aad64796
3
  size 550593856
last-checkpoint/global_step273/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a239c8f5eeffc87a3b8e84887a8e05754968385fd63a5d952453946899e284
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd156f4f78904d80425ea079796a91220c0b68adb7c6e56666df77f5e7f8b0de
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f55e9e2f19392698a61fe9dab747c965188add3ca8fec5c6dceab18965c9c1
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ed25b7b5f856c355e47d3ef5260dbe1eea9cd39bf20021965f1afdc484fa415
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb8d8735dd5765734ff469a79c7e07d58d7fa443834ecf1894859683a69e988
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bea816c3f2a08032ef80be20f31cbe8d5da66d4d6f61409b08fedf474ef878b
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a3e5bbecc6e372d25f4f43f2a7df3c06ad9c581bd6f4fae1bf283e553e6c724
3
+ size 243591168
last-checkpoint/global_step273/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:307d63839128477e9f83d4c253d822d531bd8b174e46071c70f491619a38a8d6
3
+ size 243591168
last-checkpoint/global_step273/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72123919b4ca6ac83b03bad1a1a6a6f896c387dbb5c8e566d9b98a18293024a8
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9e7ba5ee59bbce7b1d7a8369741b614c14992dc1f12810792d6fe8248cad3f
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:943702d997113425be1f61993218522594c7d4297944a57a576aed739483a681
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:febb76d47320eb49ed6bcaa76d04f022e91034f9d1abf3336cc1fe70cd1c8d34
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6884ccf280ecc14775a811ad4b58c64cda56b638b1380f3ede0ae967c22dd065
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0955da2b84b95532c43e619af600459d2cce507a9d146c6e1d2cfc12696c3550
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715cc62fb431d81ab58761b15cb5630539e18a58a338cc4d1e7cebf7023bd47d
3
+ size 211435686
last-checkpoint/global_step273/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e573d32f57b269bdeb3b3d8e102ab36d5da34a30c184b2e90e1a4da387671078
3
+ size 211435686
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step234
 
1
+ global_step273
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:134a1f059f52a9bdf9df082d6896f7d8b5760d4d52176fa82dfbe01a23fb87c9
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5a01ac495960a4bdf4e8e3d767478ddeae6f6e48a0785c78a39b02d9f03944
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92a620eacd8f4cd828cfb1aebe552d5e8dae8821e0f453da23766cd3cb0fc809
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a997267e4e5e22741250c431d4119f75a51dd8d32f691af3d6ddfcdd72fb96
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a162c9df7047160908baee7101046d35e92bb1a1356ac6f65730b0b0abb6d169
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e4fe0ad3a692bdc4f5dd7af7febcbaf52826f91569d816d162d70c3d5aae57
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f9712a619d6aa46296d8a883ec08e9d1c3e69b8f47bb56e4a362c61af346d5c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5efce2552c4a4695a29bf25d9f63b6643d4a8ee75838c6e78f968cfeb77ced6
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c3be75b246d7517b2ce93c5ea356d84fa1126631a1aa584e4174a6a45fb01f8
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82a6db6f4ce94d4baff585d4bf8aaabf2351a80d5b9ea39e0f01f54a07f8cc7d
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2cd295e7ee2a480367b2e688de7637a1d212d602ed9ac1e83ce463138ef7d19
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098920af0acbca97e22a72d86dd88a3b5fb1ee4a312e7aa98369fc3e28978653
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:393dcff3659995d822f8e1744b724bc57b6986bec073c96de798eb021d6f55e4
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e14f5793871a938571a938379ee73acad2b8bbc41260bdbfeeef799929af076
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f042c259ed788084e3425e22e2364144fa3d72b73d1f475b32393e7b99cb156c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70ca795c779e38cba23bef63d56eb87a6d53fdb68031316ac8452b61a60aa3b
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:366054962264279f0b5e5302d033c378e5387597c8a92a96d8dbf2d43d872448
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f96a436204d535377b5df836584c2a61915d36dc9059dc3240944efd6133bb4f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.609375,
5
  "eval_steps": 39,
6
- "global_step": 234,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1701,6 +1701,287 @@
1701
  "eval_samples_per_second": 1.225,
1702
  "eval_steps_per_second": 0.153,
1703
  "step": 234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1704
  }
1705
  ],
1706
  "logging_steps": 1,
@@ -1720,7 +2001,7 @@
1720
  "attributes": {}
1721
  }
1722
  },
1723
- "total_flos": 77355313790976.0,
1724
  "train_batch_size": 1,
1725
  "trial_name": null,
1726
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7109375,
5
  "eval_steps": 39,
6
+ "global_step": 273,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1701
  "eval_samples_per_second": 1.225,
1702
  "eval_steps_per_second": 0.153,
1703
  "step": 234
1704
+ },
1705
+ {
1706
+ "epoch": 0.6119791666666666,
1707
+ "grad_norm": 0.15343655698839206,
1708
+ "learning_rate": 4.235591624451763e-05,
1709
+ "loss": 2.5605,
1710
+ "step": 235
1711
+ },
1712
+ {
1713
+ "epoch": 0.6145833333333334,
1714
+ "grad_norm": 0.14304569592672528,
1715
+ "learning_rate": 4.198365462085446e-05,
1716
+ "loss": 2.5812,
1717
+ "step": 236
1718
+ },
1719
+ {
1720
+ "epoch": 0.6171875,
1721
+ "grad_norm": 0.15320835548074707,
1722
+ "learning_rate": 4.161236257651587e-05,
1723
+ "loss": 2.5275,
1724
+ "step": 237
1725
+ },
1726
+ {
1727
+ "epoch": 0.6197916666666666,
1728
+ "grad_norm": 0.15452569712518102,
1729
+ "learning_rate": 4.1242067768811134e-05,
1730
+ "loss": 2.4707,
1731
+ "step": 238
1732
+ },
1733
+ {
1734
+ "epoch": 0.6223958333333334,
1735
+ "grad_norm": 0.16320750887974791,
1736
+ "learning_rate": 4.0872797780765946e-05,
1737
+ "loss": 2.3996,
1738
+ "step": 239
1739
+ },
1740
+ {
1741
+ "epoch": 0.625,
1742
+ "grad_norm": 0.14194397858802227,
1743
+ "learning_rate": 4.0504580119067933e-05,
1744
+ "loss": 2.2431,
1745
+ "step": 240
1746
+ },
1747
+ {
1748
+ "epoch": 0.6276041666666666,
1749
+ "grad_norm": 0.1437781011206105,
1750
+ "learning_rate": 4.01374422120175e-05,
1751
+ "loss": 2.3929,
1752
+ "step": 241
1753
+ },
1754
+ {
1755
+ "epoch": 0.6302083333333334,
1756
+ "grad_norm": 0.14383943780586664,
1757
+ "learning_rate": 3.977141140748484e-05,
1758
+ "loss": 2.3989,
1759
+ "step": 242
1760
+ },
1761
+ {
1762
+ "epoch": 0.6328125,
1763
+ "grad_norm": 0.16948819133813695,
1764
+ "learning_rate": 3.94065149708728e-05,
1765
+ "loss": 2.4256,
1766
+ "step": 243
1767
+ },
1768
+ {
1769
+ "epoch": 0.6354166666666666,
1770
+ "grad_norm": 0.14755837584042042,
1771
+ "learning_rate": 3.904278008308589e-05,
1772
+ "loss": 2.2711,
1773
+ "step": 244
1774
+ },
1775
+ {
1776
+ "epoch": 0.6380208333333334,
1777
+ "grad_norm": 0.15519908616035058,
1778
+ "learning_rate": 3.868023383850556e-05,
1779
+ "loss": 2.4623,
1780
+ "step": 245
1781
+ },
1782
+ {
1783
+ "epoch": 0.640625,
1784
+ "grad_norm": 0.16021538951276384,
1785
+ "learning_rate": 3.831890324297197e-05,
1786
+ "loss": 2.3857,
1787
+ "step": 246
1788
+ },
1789
+ {
1790
+ "epoch": 0.6432291666666666,
1791
+ "grad_norm": 0.15886505001684953,
1792
+ "learning_rate": 3.795881521177236e-05,
1793
+ "loss": 2.5196,
1794
+ "step": 247
1795
+ },
1796
+ {
1797
+ "epoch": 0.6458333333333334,
1798
+ "grad_norm": 0.16005244689800305,
1799
+ "learning_rate": 3.7599996567636156e-05,
1800
+ "loss": 2.406,
1801
+ "step": 248
1802
+ },
1803
+ {
1804
+ "epoch": 0.6484375,
1805
+ "grad_norm": 0.1470078515999776,
1806
+ "learning_rate": 3.724247403873694e-05,
1807
+ "loss": 2.4975,
1808
+ "step": 249
1809
+ },
1810
+ {
1811
+ "epoch": 0.6510416666666666,
1812
+ "grad_norm": 0.1560311928142992,
1813
+ "learning_rate": 3.688627425670147e-05,
1814
+ "loss": 2.374,
1815
+ "step": 250
1816
+ },
1817
+ {
1818
+ "epoch": 0.6536458333333334,
1819
+ "grad_norm": 0.15349615074542047,
1820
+ "learning_rate": 3.653142375462596e-05,
1821
+ "loss": 2.4155,
1822
+ "step": 251
1823
+ },
1824
+ {
1825
+ "epoch": 0.65625,
1826
+ "grad_norm": 0.20641136824203335,
1827
+ "learning_rate": 3.6177948965099585e-05,
1828
+ "loss": 2.4358,
1829
+ "step": 252
1830
+ },
1831
+ {
1832
+ "epoch": 0.6588541666666666,
1833
+ "grad_norm": 0.14326347914064022,
1834
+ "learning_rate": 3.582587621823558e-05,
1835
+ "loss": 2.4528,
1836
+ "step": 253
1837
+ },
1838
+ {
1839
+ "epoch": 0.6614583333333334,
1840
+ "grad_norm": 0.13097442902507145,
1841
+ "learning_rate": 3.547523173970989e-05,
1842
+ "loss": 2.3682,
1843
+ "step": 254
1844
+ },
1845
+ {
1846
+ "epoch": 0.6640625,
1847
+ "grad_norm": 0.14938841182330287,
1848
+ "learning_rate": 3.51260416488077e-05,
1849
+ "loss": 2.5273,
1850
+ "step": 255
1851
+ },
1852
+ {
1853
+ "epoch": 0.6666666666666666,
1854
+ "grad_norm": 0.1532472206020292,
1855
+ "learning_rate": 3.477833195647773e-05,
1856
+ "loss": 2.5301,
1857
+ "step": 256
1858
+ },
1859
+ {
1860
+ "epoch": 0.6692708333333334,
1861
+ "grad_norm": 0.12783586158700921,
1862
+ "learning_rate": 3.443212856339481e-05,
1863
+ "loss": 2.3279,
1864
+ "step": 257
1865
+ },
1866
+ {
1867
+ "epoch": 0.671875,
1868
+ "grad_norm": 0.15547617846106007,
1869
+ "learning_rate": 3.408745725803042e-05,
1870
+ "loss": 2.4209,
1871
+ "step": 258
1872
+ },
1873
+ {
1874
+ "epoch": 0.6744791666666666,
1875
+ "grad_norm": 0.13531260542176757,
1876
+ "learning_rate": 3.3744343714731835e-05,
1877
+ "loss": 2.3595,
1878
+ "step": 259
1879
+ },
1880
+ {
1881
+ "epoch": 0.6770833333333334,
1882
+ "grad_norm": 0.1488109854224464,
1883
+ "learning_rate": 3.3402813491809623e-05,
1884
+ "loss": 2.2631,
1885
+ "step": 260
1886
+ },
1887
+ {
1888
+ "epoch": 0.6796875,
1889
+ "grad_norm": 0.1753652780376821,
1890
+ "learning_rate": 3.3062892029633817e-05,
1891
+ "loss": 2.4748,
1892
+ "step": 261
1893
+ },
1894
+ {
1895
+ "epoch": 0.6822916666666666,
1896
+ "grad_norm": 0.1593205802273226,
1897
+ "learning_rate": 3.272460464873884e-05,
1898
+ "loss": 2.4484,
1899
+ "step": 262
1900
+ },
1901
+ {
1902
+ "epoch": 0.6848958333333334,
1903
+ "grad_norm": 0.1401885264986934,
1904
+ "learning_rate": 3.238797654793752e-05,
1905
+ "loss": 2.4234,
1906
+ "step": 263
1907
+ },
1908
+ {
1909
+ "epoch": 0.6875,
1910
+ "grad_norm": 0.15910838717602993,
1911
+ "learning_rate": 3.205303280244389e-05,
1912
+ "loss": 2.4679,
1913
+ "step": 264
1914
+ },
1915
+ {
1916
+ "epoch": 0.6901041666666666,
1917
+ "grad_norm": 0.16179488061734165,
1918
+ "learning_rate": 3.1719798362005444e-05,
1919
+ "loss": 2.4883,
1920
+ "step": 265
1921
+ },
1922
+ {
1923
+ "epoch": 0.6927083333333334,
1924
+ "grad_norm": 0.15848452909780508,
1925
+ "learning_rate": 3.138829804904464e-05,
1926
+ "loss": 2.4583,
1927
+ "step": 266
1928
+ },
1929
+ {
1930
+ "epoch": 0.6953125,
1931
+ "grad_norm": 0.16307212504652477,
1932
+ "learning_rate": 3.105855655680986e-05,
1933
+ "loss": 2.3327,
1934
+ "step": 267
1935
+ },
1936
+ {
1937
+ "epoch": 0.6979166666666666,
1938
+ "grad_norm": 0.1467535420565889,
1939
+ "learning_rate": 3.073059844753604e-05,
1940
+ "loss": 2.4382,
1941
+ "step": 268
1942
+ },
1943
+ {
1944
+ "epoch": 0.7005208333333334,
1945
+ "grad_norm": 0.14201414432531673,
1946
+ "learning_rate": 3.0404448150615063e-05,
1947
+ "loss": 2.3501,
1948
+ "step": 269
1949
+ },
1950
+ {
1951
+ "epoch": 0.703125,
1952
+ "grad_norm": 0.1549923554458448,
1953
+ "learning_rate": 3.0080129960776017e-05,
1954
+ "loss": 2.396,
1955
+ "step": 270
1956
+ },
1957
+ {
1958
+ "epoch": 0.7057291666666666,
1959
+ "grad_norm": 0.1524034184779795,
1960
+ "learning_rate": 2.9757668036275477e-05,
1961
+ "loss": 2.2784,
1962
+ "step": 271
1963
+ },
1964
+ {
1965
+ "epoch": 0.7083333333333334,
1966
+ "grad_norm": 0.16256677012412982,
1967
+ "learning_rate": 2.9437086397097995e-05,
1968
+ "loss": 2.3027,
1969
+ "step": 272
1970
+ },
1971
+ {
1972
+ "epoch": 0.7109375,
1973
+ "grad_norm": 0.15633557288864075,
1974
+ "learning_rate": 2.9118408923166875e-05,
1975
+ "loss": 2.5473,
1976
+ "step": 273
1977
+ },
1978
+ {
1979
+ "epoch": 0.7109375,
1980
+ "eval_loss": 2.3996334075927734,
1981
+ "eval_runtime": 65.6177,
1982
+ "eval_samples_per_second": 1.219,
1983
+ "eval_steps_per_second": 0.152,
1984
+ "step": 273
1985
  }
1986
  ],
1987
  "logging_steps": 1,
 
2001
  "attributes": {}
2002
  }
2003
  },
2004
+ "total_flos": 90247866089472.0,
2005
  "train_batch_size": 1,
2006
  "trial_name": null,
2007
  "trial_params": null