ToastyPigeon commited on
Commit
4af078f
·
verified ·
1 Parent(s): 3ffdf7d

Training in progress, step 312, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step312/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step312/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step312/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step312/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step312/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step312/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step312/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step312/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step312/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step312/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step312/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step312/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step312/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step312/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step312/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step312/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a53baa7fb5e46eac3e86e83348e470e62e4cdc1131a11ff07d700e96aad64796
3
  size 550593856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0809bafbece4702b690076073d24e954afb4de759d2258a07d2ced1ca7d828ec
3
  size 550593856
last-checkpoint/global_step312/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0d1538a61a164214107b0478003fc7fe39f4b21af09842ff4667318c6f8c42
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab06953eca0473b7ed4c3b4cb2ab944690278d6dab7dc32497d563e01703364
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2f5e16650e0d332f097e79c7b2e5e170c09e29c6bc8264438b8c6182d2fc86
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ab7f608f2cc4f4990bc4565b243fca81c104bc82da093b430561fbf8397a45
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1512d67803b70b8c6e516e391a8e27d72fa15fe4557175fbd81b72b26f1e1b13
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feca9723f52a0665e22846e418b1ae8363359f6bb159d3e949b55a35d13a379b
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f76b90fd27fa0aabe7f9e20374762c64ecd730d2d8f61394240ead24469b4a8
3
+ size 243591168
last-checkpoint/global_step312/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a165222c592a47de1188f5456a9da5333cfc27abff9601ed632c659fa35aa595
3
+ size 243591168
last-checkpoint/global_step312/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9c0c74726cedbf18c1d4e8d3387e46076e7ba3248cb8b27d082370880a92f0
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:535511728ceae218960da1541b6b195dfab221e29b07df3fd5dde22d243e4405
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62479d721765bb3d3f114bb571ab169f7e8a8f35a1a46c7af67e286e9b5fa40
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b15bfd73ce17131b40411120731e565c27bd2f39df341cf03cc3dd576ee8d1
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0794817ee85f313991856c88ad8f1417c110279153fa9a568829443e1aec7e5e
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8928533430fd1b9e926f4ac94549434d3893aa73a01c292c023f7b4cedc3e0db
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7617fff71907dd755d7475b737b427cef5dc3c3bdb4ac7b809a25e7bc99873ac
3
+ size 211435686
last-checkpoint/global_step312/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fafbe9cee36214356c8f4fa9dcd0117bb22d0f7df6a976aacc9c5e49e575f145
3
+ size 211435686
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step273
 
1
+ global_step312
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa5a01ac495960a4bdf4e8e3d767478ddeae6f6e48a0785c78a39b02d9f03944
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9662f90ac871284e42fb85b7bc6f2a2f3759cac7bb678faaa94f777fedabb313
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a997267e4e5e22741250c431d4119f75a51dd8d32f691af3d6ddfcdd72fb96
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f3b2288ad04448267691f0a9a926d6ea4e1872e2d6a6114ff6a455551a1a14
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99e4fe0ad3a692bdc4f5dd7af7febcbaf52826f91569d816d162d70c3d5aae57
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7054584e190f07a5be6417b3d068feba1c297bfef11d44338e024157eac1eea
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5efce2552c4a4695a29bf25d9f63b6643d4a8ee75838c6e78f968cfeb77ced6
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699ca77f5f867262084cafd79c14bfb3ebb441808dca86d851f008f9e856702e
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82a6db6f4ce94d4baff585d4bf8aaabf2351a80d5b9ea39e0f01f54a07f8cc7d
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6107f40d55050f71acd0cf64cee3cb55f7a5f2f65beb01564a3368189c88b78a
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098920af0acbca97e22a72d86dd88a3b5fb1ee4a312e7aa98369fc3e28978653
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34860f3d27e6296937a819f44272b9427aaea2bb9384f4f850d4d368c07caa62
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e14f5793871a938571a938379ee73acad2b8bbc41260bdbfeeef799929af076
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0486011b840a4dd1cc3653fce36b9eeabdeaf4aaa824c495c5201d1b4398ad03
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c70ca795c779e38cba23bef63d56eb87a6d53fdb68031316ac8452b61a60aa3b
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8865eac23bdd917b7f787c6c0a4ad6441de4e3e64c1376b83d0e8ca0e795d614
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f96a436204d535377b5df836584c2a61915d36dc9059dc3240944efd6133bb4f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cedf4b34e868c4fb23dc007fcfc750ae8b8c1963085e5e7d64434ad8b0f1b971
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7109375,
5
  "eval_steps": 39,
6
- "global_step": 273,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1982,6 +1982,287 @@
1982
  "eval_samples_per_second": 1.219,
1983
  "eval_steps_per_second": 0.152,
1984
  "step": 273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1985
  }
1986
  ],
1987
  "logging_steps": 1,
@@ -2001,7 +2282,7 @@
2001
  "attributes": {}
2002
  }
2003
  },
2004
- "total_flos": 90247866089472.0,
2005
  "train_batch_size": 1,
2006
  "trial_name": null,
2007
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8125,
5
  "eval_steps": 39,
6
+ "global_step": 312,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1982
  "eval_samples_per_second": 1.219,
1983
  "eval_steps_per_second": 0.152,
1984
  "step": 273
1985
+ },
1986
+ {
1987
+ "epoch": 0.7135416666666666,
1988
+ "grad_norm": 0.14577207953482688,
1989
+ "learning_rate": 2.8801659352565335e-05,
1990
+ "loss": 2.3106,
1991
+ "step": 274
1992
+ },
1993
+ {
1994
+ "epoch": 0.7161458333333334,
1995
+ "grad_norm": 0.12720217228966463,
1996
+ "learning_rate": 2.848686127976829e-05,
1997
+ "loss": 2.4048,
1998
+ "step": 275
1999
+ },
2000
+ {
2001
+ "epoch": 0.71875,
2002
+ "grad_norm": 0.1727762156703378,
2003
+ "learning_rate": 2.8174038153884862e-05,
2004
+ "loss": 2.3996,
2005
+ "step": 276
2006
+ },
2007
+ {
2008
+ "epoch": 0.7213541666666666,
2009
+ "grad_norm": 0.20872850658454511,
2010
+ "learning_rate": 2.786321327691158e-05,
2011
+ "loss": 2.5836,
2012
+ "step": 277
2013
+ },
2014
+ {
2015
+ "epoch": 0.7239583333333334,
2016
+ "grad_norm": 0.1591486153830447,
2017
+ "learning_rate": 2.7554409801996723e-05,
2018
+ "loss": 2.4478,
2019
+ "step": 278
2020
+ },
2021
+ {
2022
+ "epoch": 0.7265625,
2023
+ "grad_norm": 0.14032674083135735,
2024
+ "learning_rate": 2.7247650731715564e-05,
2025
+ "loss": 2.2852,
2026
+ "step": 279
2027
+ },
2028
+ {
2029
+ "epoch": 0.7291666666666666,
2030
+ "grad_norm": 0.15626590715798697,
2031
+ "learning_rate": 2.6942958916356998e-05,
2032
+ "loss": 2.525,
2033
+ "step": 280
2034
+ },
2035
+ {
2036
+ "epoch": 0.7317708333333334,
2037
+ "grad_norm": 0.16623699130944486,
2038
+ "learning_rate": 2.66403570522214e-05,
2039
+ "loss": 2.3565,
2040
+ "step": 281
2041
+ },
2042
+ {
2043
+ "epoch": 0.734375,
2044
+ "grad_norm": 0.1449533445649136,
2045
+ "learning_rate": 2.6339867679929997e-05,
2046
+ "loss": 2.2674,
2047
+ "step": 282
2048
+ },
2049
+ {
2050
+ "epoch": 0.7369791666666666,
2051
+ "grad_norm": 0.14290681116465945,
2052
+ "learning_rate": 2.6041513182745837e-05,
2053
+ "loss": 2.1482,
2054
+ "step": 283
2055
+ },
2056
+ {
2057
+ "epoch": 0.7395833333333334,
2058
+ "grad_norm": 0.13123439654719476,
2059
+ "learning_rate": 2.574531578490651e-05,
2060
+ "loss": 2.2889,
2061
+ "step": 284
2062
+ },
2063
+ {
2064
+ "epoch": 0.7421875,
2065
+ "grad_norm": 0.1510144014374668,
2066
+ "learning_rate": 2.54512975499686e-05,
2067
+ "loss": 2.3577,
2068
+ "step": 285
2069
+ },
2070
+ {
2071
+ "epoch": 0.7447916666666666,
2072
+ "grad_norm": 0.17738379736772392,
2073
+ "learning_rate": 2.515948037916423e-05,
2074
+ "loss": 2.3591,
2075
+ "step": 286
2076
+ },
2077
+ {
2078
+ "epoch": 0.7473958333333334,
2079
+ "grad_norm": 0.14040918825453863,
2080
+ "learning_rate": 2.4869886009769657e-05,
2081
+ "loss": 2.3019,
2082
+ "step": 287
2083
+ },
2084
+ {
2085
+ "epoch": 0.75,
2086
+ "grad_norm": 0.15309282350105588,
2087
+ "learning_rate": 2.4582536013486054e-05,
2088
+ "loss": 2.3621,
2089
+ "step": 288
2090
+ },
2091
+ {
2092
+ "epoch": 0.7526041666666666,
2093
+ "grad_norm": 0.15908796843842757,
2094
+ "learning_rate": 2.429745179483272e-05,
2095
+ "loss": 2.451,
2096
+ "step": 289
2097
+ },
2098
+ {
2099
+ "epoch": 0.7552083333333334,
2100
+ "grad_norm": 0.15934279432082493,
2101
+ "learning_rate": 2.4014654589552526e-05,
2102
+ "loss": 2.4775,
2103
+ "step": 290
2104
+ },
2105
+ {
2106
+ "epoch": 0.7578125,
2107
+ "grad_norm": 0.15682626436856428,
2108
+ "learning_rate": 2.3734165463030244e-05,
2109
+ "loss": 2.4,
2110
+ "step": 291
2111
+ },
2112
+ {
2113
+ "epoch": 0.7604166666666666,
2114
+ "grad_norm": 0.1688230501684105,
2115
+ "learning_rate": 2.345600530872328e-05,
2116
+ "loss": 2.4442,
2117
+ "step": 292
2118
+ },
2119
+ {
2120
+ "epoch": 0.7630208333333334,
2121
+ "grad_norm": 0.16115250940484133,
2122
+ "learning_rate": 2.3180194846605367e-05,
2123
+ "loss": 2.4088,
2124
+ "step": 293
2125
+ },
2126
+ {
2127
+ "epoch": 0.765625,
2128
+ "grad_norm": 0.14208493985226064,
2129
+ "learning_rate": 2.290675462162318e-05,
2130
+ "loss": 2.2623,
2131
+ "step": 294
2132
+ },
2133
+ {
2134
+ "epoch": 0.7682291666666666,
2135
+ "grad_norm": 0.13703687813086513,
2136
+ "learning_rate": 2.263570500216591e-05,
2137
+ "loss": 2.486,
2138
+ "step": 295
2139
+ },
2140
+ {
2141
+ "epoch": 0.7708333333333334,
2142
+ "grad_norm": 0.16480865487948104,
2143
+ "learning_rate": 2.2367066178548072e-05,
2144
+ "loss": 2.321,
2145
+ "step": 296
2146
+ },
2147
+ {
2148
+ "epoch": 0.7734375,
2149
+ "grad_norm": 0.15426178206689278,
2150
+ "learning_rate": 2.2100858161505506e-05,
2151
+ "loss": 2.2805,
2152
+ "step": 297
2153
+ },
2154
+ {
2155
+ "epoch": 0.7760416666666666,
2156
+ "grad_norm": 0.14570862119709826,
2157
+ "learning_rate": 2.183710078070485e-05,
2158
+ "loss": 2.551,
2159
+ "step": 298
2160
+ },
2161
+ {
2162
+ "epoch": 0.7786458333333334,
2163
+ "grad_norm": 0.14473284935283884,
2164
+ "learning_rate": 2.157581368326635e-05,
2165
+ "loss": 2.1132,
2166
+ "step": 299
2167
+ },
2168
+ {
2169
+ "epoch": 0.78125,
2170
+ "grad_norm": 0.15820239745667702,
2171
+ "learning_rate": 2.1317016332300447e-05,
2172
+ "loss": 2.4685,
2173
+ "step": 300
2174
+ },
2175
+ {
2176
+ "epoch": 0.7838541666666666,
2177
+ "grad_norm": 0.14299466522368254,
2178
+ "learning_rate": 2.106072800545793e-05,
2179
+ "loss": 2.4417,
2180
+ "step": 301
2181
+ },
2182
+ {
2183
+ "epoch": 0.7864583333333334,
2184
+ "grad_norm": 0.13794584283858527,
2185
+ "learning_rate": 2.080696779349396e-05,
2186
+ "loss": 2.3209,
2187
+ "step": 302
2188
+ },
2189
+ {
2190
+ "epoch": 0.7890625,
2191
+ "grad_norm": 0.14676514709546462,
2192
+ "learning_rate": 2.0555754598846027e-05,
2193
+ "loss": 2.3155,
2194
+ "step": 303
2195
+ },
2196
+ {
2197
+ "epoch": 0.7916666666666666,
2198
+ "grad_norm": 0.1551110645608698,
2199
+ "learning_rate": 2.030710713422592e-05,
2200
+ "loss": 2.5221,
2201
+ "step": 304
2202
+ },
2203
+ {
2204
+ "epoch": 0.7942708333333334,
2205
+ "grad_norm": 0.15076270062351085,
2206
+ "learning_rate": 2.0061043921225828e-05,
2207
+ "loss": 2.3056,
2208
+ "step": 305
2209
+ },
2210
+ {
2211
+ "epoch": 0.796875,
2212
+ "grad_norm": 0.14884431911568607,
2213
+ "learning_rate": 1.981758328893866e-05,
2214
+ "loss": 2.4404,
2215
+ "step": 306
2216
+ },
2217
+ {
2218
+ "epoch": 0.7994791666666666,
2219
+ "grad_norm": 0.14993854232241055,
2220
+ "learning_rate": 1.9576743372592747e-05,
2221
+ "loss": 2.2778,
2222
+ "step": 307
2223
+ },
2224
+ {
2225
+ "epoch": 0.8020833333333334,
2226
+ "grad_norm": 0.13916032144027837,
2227
+ "learning_rate": 1.933854211220094e-05,
2228
+ "loss": 2.3168,
2229
+ "step": 308
2230
+ },
2231
+ {
2232
+ "epoch": 0.8046875,
2233
+ "grad_norm": 0.1567961212337068,
2234
+ "learning_rate": 1.9102997251224282e-05,
2235
+ "loss": 2.3217,
2236
+ "step": 309
2237
+ },
2238
+ {
2239
+ "epoch": 0.8072916666666666,
2240
+ "grad_norm": 0.14185229006162023,
2241
+ "learning_rate": 1.8870126335250293e-05,
2242
+ "loss": 2.3056,
2243
+ "step": 310
2244
+ },
2245
+ {
2246
+ "epoch": 0.8098958333333334,
2247
+ "grad_norm": 0.14083323135716713,
2248
+ "learning_rate": 1.8639946710686064e-05,
2249
+ "loss": 2.5101,
2250
+ "step": 311
2251
+ },
2252
+ {
2253
+ "epoch": 0.8125,
2254
+ "grad_norm": 0.1428611888153054,
2255
+ "learning_rate": 1.841247552346603e-05,
2256
+ "loss": 2.4123,
2257
+ "step": 312
2258
+ },
2259
+ {
2260
+ "epoch": 0.8125,
2261
+ "eval_loss": 2.398231029510498,
2262
+ "eval_runtime": 65.4627,
2263
+ "eval_samples_per_second": 1.222,
2264
+ "eval_steps_per_second": 0.153,
2265
+ "step": 312
2266
  }
2267
  ],
2268
  "logging_steps": 1,
 
2282
  "attributes": {}
2283
  }
2284
  },
2285
+ "total_flos": 103140418387968.0,
2286
  "train_batch_size": 1,
2287
  "trial_name": null,
2288
  "trial_params": null